Merge "MIPS64: Add java.lang.String.equals intrinsic." am: 34053d1f5a am: 41817d467b
am: f6ea9768d9

* commit 'f6ea9768d9a1b6498d99341d790bc8279f52ed03':
diff --git a/Android.mk b/Android.mk
index fcf70ff..34022ae 100644
--- a/Android.mk
+++ b/Android.mk
@@ -77,6 +77,7 @@
 # product rules
 
 include $(art_path)/runtime/Android.mk
+include $(art_path)/runtime/simulator/Android.mk
 include $(art_path)/compiler/Android.mk
 include $(art_path)/dexdump/Android.mk
 include $(art_path)/dexlist/Android.mk
@@ -97,16 +98,26 @@
 ART_HOST_DEPENDENCIES := \
 	$(ART_HOST_EXECUTABLES) \
 	$(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar \
-	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+	$(HOST_OUT_JAVA_LIBRARIES)/core-oj-hostdex.jar \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := \
 	$(ART_TARGET_EXECUTABLES) \
 	$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
-	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+	$(TARGET_OUT_JAVA_LIBRARIES)/core-oj.jar \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so \
+	$(TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 ifdef TARGET_2ND_ARCH
 ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 endif
 ifdef HOST_2ND_ARCH
 ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libjavacore.so
+ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdk.so
+ART_HOST_DEPENDENCIES += $(2ND_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvm.so
 endif
 
 ########################################################################
@@ -122,6 +133,16 @@
 include $(art_path)/test/Android.run-test.mk
 include $(art_path)/benchmark/Android.mk
 
+TEST_ART_ADB_ROOT_AND_REMOUNT := \
+    (adb root && \
+     adb wait-for-device remount && \
+     ((adb shell touch /system/testfile && \
+       (adb shell rm /system/testfile || true)) || \
+      (adb disable-verity && \
+       adb reboot && \
+       adb wait-for-device root && \
+       adb wait-for-device remount)))
+
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
 # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty,
@@ -130,14 +151,16 @@
 ifneq ($(ART_TEST_NO_SYNC),true)
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT)
-	adb push $(ANDROID_PRODUCT_OUT)/data /data
+# Push the contents of the `data` dir into `/data` on the device.  If
+# `/data` already exists on the device, it is not overwritten, but its
+# contents are updated.
+	adb push $(ANDROID_PRODUCT_OUT)/data /
 endif
 endif
 
@@ -374,8 +397,7 @@
 
 .PHONY: oat-target-sync
 oat-target-sync: oat-target
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
 ########################################################################
@@ -393,10 +415,10 @@
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
-build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES)
+build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 .PHONY: build-art-target-tests
-build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS)
+build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 ########################################################################
 # targets to switch back and forth from libdvm to libart
diff --git a/NOTICE b/NOTICE
index d27f6a6..d79b004 100644
--- a/NOTICE
+++ b/NOTICE
@@ -262,5 +262,3 @@
 pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
 ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
 md5 contributions   llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
-
--------------------------------------------------------------------
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index cd9d18d..eec471e 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,7 +118,8 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-ART_TARGET_CLANG_mips64 :=
+# b/25928358, illegal instruction on mips64r6 with -O0
+ART_TARGET_CLANG_mips64 := false
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
@@ -205,6 +206,7 @@
 ART_C_INCLUDES := \
   external/gtest/include \
   external/icu/icu4c/source/common \
+  external/lz4/lib \
   external/valgrind/include \
   external/valgrind \
   external/vixl/src \
@@ -357,12 +359,6 @@
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
 ART_HOST_ASFLAGS += $(art_asflags)
 
-# The latest clang update trips over many of the files in art and never finishes
-# compiling for aarch64 with -O3 (or -O2). Drop back to -O1 while we investigate
-# to stop punishing the build server.
-# Bug: http://b/23256622
-ART_TARGET_CLANG_CFLAGS_arm64 += -O1
-
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index c53479c..7fd2a5a 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -80,7 +80,7 @@
 TARGET_CORE_IMG_LOCATION := $(ART_TARGET_TEST_OUT)/core.art
 
 # Jar files for core.art.
-TARGET_CORE_JARS := core-libart conscrypt okhttp bouncycastle
+TARGET_CORE_JARS := core-oj core-libart conscrypt okhttp bouncycastle
 HOST_CORE_JARS := $(addsuffix -hostdex,$(TARGET_CORE_JARS))
 
 HOST_CORE_DEX_LOCATIONS   := $(foreach jar,$(HOST_CORE_JARS),  $(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
@@ -91,9 +91,9 @@
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
 # Classpath for Jack compilation: we only need core-libart.
-HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
-HOST_JACK_CLASSPATH                := $(foreach dep,$(HOST_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
-TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
-TARGET_JACK_CLASSPATH              := $(foreach dep,$(TARGET_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
+HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
+HOST_JACK_CLASSPATH                := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack)
+TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
+TARGET_JACK_CLASSPATH              := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
 endif
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 3b2d1cc..cb6d340 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -201,6 +201,9 @@
 # $(5): library dependencies (host only)
 # $(6): extra include directories
 # $(7): multilib (default: empty), valid values: {,32,64,both})
+# $(8): host prefer 32-bit: {true, false} (default: false).  If argument
+#       `multilib` is explicitly set to 64, ignore the "host prefer 32-bit"
+#       setting and only build a 64-bit executable on host.
 define build-art-multi-executable
   $(foreach debug_flavor,ndebug debug,
     $(foreach target_flavor,host target,
@@ -211,6 +214,7 @@
       art-multi-lib-dependencies-host := $(5)
       art-multi-include-extra := $(6)
       art-multi-multilib := $(7)
+      art-multi-host-prefer-32-bit := $(8)
 
       # Add either -host or -target specific lib dependencies to the lib dependencies.
       art-multi-lib-dependencies += $$(art-multi-lib-dependencies-$(target_flavor))
@@ -223,6 +227,14 @@
       # Build the env guard var name, e.g. ART_BUILD_HOST_NDEBUG.
       art-multi-env-guard := $$(call art-string-to-uppercase,ART_BUILD_$(target_flavor)_$(debug_flavor))
 
+      ifeq ($(target_flavor),host)
+        ifeq ($$(art-multi-host-prefer-32-bit),true)
+          ifneq ($$(art-multi-multilib),64)
+            art-multi-multilib := 32
+          endif
+        endif
+      endif
+
       # Build the art executable only if the corresponding env guard was set.
       ifeq ($$($$(art-multi-env-guard)),true)
         $$(eval $$(call build-art-executable,$$(art-multi-binary-name),$$(art-multi-source-files),$$(art-multi-lib-dependencies),$$(art-multi-include-extra),$(target_flavor),$(debug_flavor),$$(art-multi-multilib)))
@@ -236,6 +248,7 @@
       art-multi-lib-dependencies-host :=
       art-multi-include-extra :=
       art-multi-multilib :=
+      art-multi-host-prefer-32-bit :=
       art-multi-env-guard :=
     )
   )
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index dcde5ab..99f7a2a 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -248,6 +248,7 @@
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
+  compiler/linker/output_stream_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
   compiler/optimizing/dominator_test.cc \
@@ -266,7 +267,6 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/output_stream_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
@@ -382,7 +382,7 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl
 LOCAL_STATIC_LIBRARIES += libgtest
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
@@ -399,7 +399,7 @@
 LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler
 LOCAL_STATIC_LIBRARIES := libgtest_host
 LOCAL_LDLIBS += -ldl -lpthread
@@ -441,7 +441,9 @@
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_TARGET_GTEST_$(file)_DEX)) \
     $$(ART_TARGET_NATIVETEST_OUT)/$$(TARGET_$(2)ARCH)/$(1) \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
-    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar
+    $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdk.so \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj.jar
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): test-art-target-sync
@@ -483,6 +485,7 @@
   # Dependencies for all host gtests.
   gtest_deps := $$(HOST_CORE_DEX_LOCATIONS) \
     $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$$(ART_HOST_SHLIB_EXTENSION) \
+    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$$(ART_HOST_SHLIB_EXTENSION) \
     $$(gtest_exe) \
     $$(ART_GTEST_$(1)_HOST_DEPS) \
     $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX))
@@ -542,7 +545,7 @@
   endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   LOCAL_SRC_FILES := $$(art_gtest_filename)
-  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
+  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime art/cmdline $$(art_gtest_extra_c_includes)
   LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libartd-disassembler
   LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
@@ -564,6 +567,9 @@
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
     LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    # clang fails to compile art/runtime/arch/stub_test.cc for arm64 without -O1
+    # b/26275713
+    LOCAL_CLANG_CFLAGS_arm64 += -O1
     include $$(BUILD_EXECUTABLE)
     library_path :=
     2nd_library_path :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 592843e..50600ef 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -112,7 +112,7 @@
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
 $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency)
-	@echo "host dex2oat: $$@ ($$?)"
+	@echo "host dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
@@ -238,7 +238,7 @@
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
 $$(core_image_name): $$(TARGET_CORE_DEX_FILES) $$(core_dex2oat_dependency)
-	@echo "target dex2oat: $$@ ($$?)"
+	@echo "target dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 2e9f208..4aced5b 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -196,6 +196,7 @@
         "  --boot-image=<file.art>: provide the image location for the boot class path.\n"
         "      Do not include the arch as part of the name, it is added automatically.\n"
         "      Example: --boot-image=/system/framework/boot.art\n"
+        "               (specifies /system/framework/<arch>/boot.art as the image file)\n"
         "\n";
     usage += StringPrintf(  // Optional.
         "  --instruction-set=(arm|arm64|mips|mips64|x86|x86_64): for locating the image\n"
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 34fb790..fe83ba9 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -243,8 +243,8 @@
 TEST_F(CmdlineParserTest, TestLogVerbosity) {
   {
     const char* log_args = "-verbose:"
-        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,startup,third-party-jni,"
-        "threads,verifier";
+        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,simulator,startup,"
+        "third-party-jni,threads,verifier";
 
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.class_linker = true;
@@ -256,6 +256,7 @@
     log_verbosity.monitor = true;
     log_verbosity.profiler = true;
     log_verbosity.signals = true;
+    log_verbosity.simulator = true;
     log_verbosity.startup = true;
     log_verbosity.third_party_jni = true;
     log_verbosity.threads = true;
@@ -458,9 +459,9 @@
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(
-        MemoryKiB(16 * KB), "-Xjitcodecacheinitialcapacity:16K", M::JITCodeCacheInitialCapacity);
+        MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity);
     EXPECT_SINGLE_PARSE_VALUE(
-        MemoryKiB(16 * MB), "-Xjitcodecacheinitialcapacity:16M", M::JITCodeCacheInitialCapacity);
+        MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold);
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index c594adb..6c0a0e1 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -18,17 +18,17 @@
 
 #define CMDLINE_NDEBUG 1  // Do not output any debugging information for parsing.
 
-#include "cmdline/memory_representation.h"
-#include "cmdline/detail/cmdline_debug_detail.h"
+#include "memory_representation.h"
+#include "detail/cmdline_debug_detail.h"
 #include "cmdline_type_parser.h"
 
 // Includes for the types that are being specialized
 #include <string>
 #include "unit.h"
 #include "jdwp/jdwp.h"
-#include "runtime/base/logging.h"
-#include "runtime/base/time_utils.h"
-#include "runtime/experimental_flags.h"
+#include "base/logging.h"
+#include "base/time_utils.h"
+#include "experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "profiler_options.h"
@@ -606,6 +606,8 @@
         log_verbosity.profiler = true;
       } else if (verbose_options[j] == "signals") {
         log_verbosity.signals = true;
+      } else if (verbose_options[j] == "simulator") {
+        log_verbosity.simulator = true;
       } else if (verbose_options[j] == "startup") {
         log_verbosity.startup = true;
       } else if (verbose_options[j] == "third-party-jni") {
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 3009b32..4b56804 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -25,10 +25,10 @@
 #include <numeric>
 #include <memory>
 
-#include "cmdline/cmdline_parse_result.h"
-#include "cmdline/token_range.h"
-#include "cmdline/unit.h"
-#include "cmdline/cmdline_types.h"
+#include "cmdline_parse_result.h"
+#include "cmdline_types.h"
+#include "token_range.h"
+#include "unit.h"
 
 namespace art {
   // Implementation details for the parser. Do not look inside if you hate templates.
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 42ddfd8..bdd9a84 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -58,6 +58,10 @@
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/buffered_output_stream.cc \
+	linker/file_output_stream.cc \
+	linker/output_stream.cc \
+	linker/vector_output_stream.cc \
 	linker/relative_patcher.cc \
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
@@ -69,6 +73,7 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
@@ -81,6 +86,7 @@
 	optimizing/load_store_elimination.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
+	optimizing/nodes_arm64.cc \
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
@@ -98,16 +104,12 @@
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
-	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
-	file_output_stream.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	output_stream.cc \
-	vector_output_stream.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
@@ -219,7 +221,8 @@
   utils/mips/assembler_mips.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
-  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \
+  utils/mips64/assembler_mips64.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 :=
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \
@@ -255,9 +258,9 @@
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libart
+      LOCAL_STATIC_LIBRARIES += libart liblz4
     else
-      LOCAL_SHARED_LIBRARIES += libart
+      LOCAL_SHARED_LIBRARIES += libart liblz4
     endif
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
@@ -265,9 +268,9 @@
   else # debug
     LOCAL_MODULE := libartd-compiler
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_STATIC_LIBRARIES += libartd
+      LOCAL_STATIC_LIBRARIES += libartd liblz4
     else
-      LOCAL_SHARED_LIBRARIES += libartd
+      LOCAL_SHARED_LIBRARIES += libartd liblz4
     endif
   endif
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index e6cc50c..c7c1907 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -142,10 +142,7 @@
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
   size_t pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    MakeExecutable(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  for (auto& m : klass->GetMethods(pointer_size)) {
     MakeExecutable(&m);
   }
 }
@@ -189,17 +186,29 @@
     }
 
     timer_.reset(new CumulativeLogger("Compilation times"));
-    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                              verification_results_.get(),
-                                              method_inliner_map_.get(),
-                                              compiler_kind_, instruction_set,
-                                              instruction_set_features_.get(),
-                                              true,
-                                              GetImageClasses(),
-                                              GetCompiledClasses(),
-                                              GetCompiledMethods(),
-                                              2, true, true, "", false, timer_.get(), -1, ""));
+    CreateCompilerDriver(compiler_kind_, instruction_set);
   }
+}
+
+void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa) {
+  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                            verification_results_.get(),
+                                            method_inliner_map_.get(),
+                                            kind,
+                                            isa,
+                                            instruction_set_features_.get(),
+                                            true,
+                                            GetImageClasses(),
+                                            GetCompiledClasses(),
+                                            GetCompiledMethods(),
+                                            2,
+                                            true,
+                                            true,
+                                            "",
+                                            false,
+                                            timer_.get(),
+                                            -1,
+                                            ""));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
@@ -247,10 +256,7 @@
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
   auto pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    CompileMethod(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  for (auto& m : klass->GetMethods(pointer_size)) {
     CompileMethod(&m);
   }
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7b0e5af..b491946 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -90,6 +90,8 @@
                             const char* method_name, const char* signature)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa);
+
   void ReserveImageSpace();
 
   void UnreserveImageSpace();
@@ -128,6 +130,7 @@
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
     switch (GetInstructionSet()) {                                                        \
+      case kArm64:                                                                        \
       case kThumb2:                                                                       \
       case kX86:                                                                          \
       case kX86_64:                                                                       \
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 15a4ba0..7a93613 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,7 +23,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
-#include "length_prefixed_array.h"
+#include "base/length_prefixed_array.h"
 #include "method_reference.h"
 #include "utils/array_ref.h"
 
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 39f8ee8..18ce563 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1430,8 +1430,4 @@
                                  method_lowering_infos_.data(), count);
 }
 
-bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
-  return cu_->compiler_driver->SkipCompilation(methodname);
-}
-
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 2da8a98..3191fe9 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -564,11 +564,6 @@
   bool SkipCompilation(std::string* skip_message);
 
   /*
-   * Should we skip the compilation of this method based on its name?
-   */
-  bool SkipCompilationByName(const std::string& methodname);
-
-  /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
    * actually the index of the method in the m_units_ array).
    */
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index eaf2408..f48947d 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -50,6 +50,23 @@
     true,   // kIntrinsicMinMaxLong
     true,   // kIntrinsicMinMaxFloat
     true,   // kIntrinsicMinMaxDouble
+    true,   // kIntrinsicCos
+    true,   // kIntrinsicSin
+    true,   // kIntrinsicAcos
+    true,   // kIntrinsicAsin
+    true,   // kIntrinsicAtan
+    true,   // kIntrinsicAtan2
+    true,   // kIntrinsicCbrt
+    true,   // kIntrinsicCosh
+    true,   // kIntrinsicExp
+    true,   // kIntrinsicExpm1
+    true,   // kIntrinsicHypot
+    true,   // kIntrinsicLog
+    true,   // kIntrinsicLog10
+    true,   // kIntrinsicNextAfter
+    true,   // kIntrinsicSinh
+    true,   // kIntrinsicTan
+    true,   // kIntrinsicTanh
     true,   // kIntrinsicSqrt
     true,   // kIntrinsicCeil
     true,   // kIntrinsicFloor
@@ -95,6 +112,23 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAsin], "Asin must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan], "Atan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicAtan2], "Atan2 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCbrt], "Cbrt must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicCosh], "Cosh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExp], "Exp must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicExpm1], "Expm1 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicHypot], "Hypot must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog], "Log must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicLog10], "Log10 must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNextAfter], "NextAfter must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSinh], "Sinh must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTan], "Tan must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicTanh], "Tanh must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSqrt], "Sqrt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCeil], "Ceil must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicFloor], "Floor must be static");
@@ -196,6 +230,23 @@
     "abs",                   // kNameCacheAbs
     "max",                   // kNameCacheMax
     "min",                   // kNameCacheMin
+    "cos",                   // kNameCacheCos
+    "sin",                   // kNameCacheSin
+    "acos",                  // kNameCacheAcos
+    "asin",                  // kNameCacheAsin
+    "atan",                  // kNameCacheAtan
+    "atan2",                 // kNameCacheAtan2
+    "cbrt",                  // kNameCacheCbrt
+    "cosh",                  // kNameCacheCosh
+    "exp",                   // kNameCacheExp
+    "expm1",                 // kNameCacheExpm1
+    "hypot",                 // kNameCacheHypot
+    "log",                   // kNameCacheLog
+    "log10",                 // kNameCacheLog10
+    "nextAfter",             // kNameCacheNextAfter
+    "sinh",                  // kNameCacheSinh
+    "tan",                   // kNameCacheTan
+    "tanh",                  // kNameCacheTanh
     "sqrt",                  // kNameCacheSqrt
     "ceil",                  // kNameCacheCeil
     "floor",                 // kNameCacheFloor
@@ -425,6 +476,23 @@
     INTRINSIC(JavaLangMath,       Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
     INTRINSIC(JavaLangStrictMath, Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
 
+    INTRINSIC(JavaLangMath,       Cos, D_D, kIntrinsicCos, 0),
+    INTRINSIC(JavaLangMath,       Sin, D_D, kIntrinsicSin, 0),
+    INTRINSIC(JavaLangMath,       Acos, D_D, kIntrinsicAcos, 0),
+    INTRINSIC(JavaLangMath,       Asin, D_D, kIntrinsicAsin, 0),
+    INTRINSIC(JavaLangMath,       Atan, D_D, kIntrinsicAtan, 0),
+    INTRINSIC(JavaLangMath,       Atan2, DD_D, kIntrinsicAtan2, 0),
+    INTRINSIC(JavaLangMath,       Cbrt, D_D, kIntrinsicCbrt, 0),
+    INTRINSIC(JavaLangMath,       Cosh, D_D, kIntrinsicCosh, 0),
+    INTRINSIC(JavaLangMath,       Exp, D_D, kIntrinsicExp, 0),
+    INTRINSIC(JavaLangMath,       Expm1, D_D, kIntrinsicExpm1, 0),
+    INTRINSIC(JavaLangMath,       Hypot, DD_D, kIntrinsicHypot, 0),
+    INTRINSIC(JavaLangMath,       Log, D_D, kIntrinsicLog, 0),
+    INTRINSIC(JavaLangMath,       Log10, D_D, kIntrinsicLog10, 0),
+    INTRINSIC(JavaLangMath,       NextAfter, DD_D, kIntrinsicNextAfter, 0),
+    INTRINSIC(JavaLangMath,       Sinh, D_D, kIntrinsicSinh, 0),
+    INTRINSIC(JavaLangMath,       Tan, D_D, kIntrinsicTan, 0),
+    INTRINSIC(JavaLangMath,       Tanh, D_D, kIntrinsicTanh, 0),
     INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
     INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
 
@@ -603,6 +671,25 @@
       return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_double */);
     case kIntrinsicMinMaxDouble:
       return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */);
+    case kIntrinsicCos:
+    case kIntrinsicSin:
+    case kIntrinsicAcos:
+    case kIntrinsicAsin:
+    case kIntrinsicAtan:
+    case kIntrinsicAtan2:
+    case kIntrinsicCbrt:
+    case kIntrinsicCosh:
+    case kIntrinsicExp:
+    case kIntrinsicExpm1:
+    case kIntrinsicHypot:
+    case kIntrinsicLog:
+    case kIntrinsicLog10:
+    case kIntrinsicNextAfter:
+    case kIntrinsicSinh:
+    case kIntrinsicTan:
+    case kIntrinsicTanh:
+      // Not implemented in Quick.
+      return false;
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
     case kIntrinsicCeil:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 5ce110c..ac70577 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -162,6 +162,23 @@
       kNameCacheAbs,
       kNameCacheMax,
       kNameCacheMin,
+      kNameCacheCos,
+      kNameCacheSin,
+      kNameCacheAcos,
+      kNameCacheAsin,
+      kNameCacheAtan,
+      kNameCacheAtan2,
+      kNameCacheCbrt,
+      kNameCacheCosh,
+      kNameCacheExp,
+      kNameCacheExpm1,
+      kNameCacheHypot,
+      kNameCacheLog,
+      kNameCacheLog10,
+      kNameCacheNextAfter,
+      kNameCacheSinh,
+      kNameCacheTan,
+      kNameCacheTanh,
       kNameCacheSqrt,
       kNameCacheCeil,
       kNameCacheFloor,
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 05dde9f..3260a7a 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -780,14 +780,6 @@
   PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu);
   pass_driver.Launch();
 
-  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
-  if (cu.compiler_driver->ProfilePresent()
-      && !cu.mir_graph->MethodIsLeaf()
-      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
-    cu.EndTiming();
-    return nullptr;
-  }
-
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e5d3841..1c2a619 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -508,6 +508,7 @@
   { kX86Lfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
   { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
   { kX86Sfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
+  { kX86LockAdd32MI8, kMemImm,        IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" },
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 75f3fef..4ff7993 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,7 +20,7 @@
 #include <inttypes.h>
 #include <string>
 
-#include "arch/instruction_set_features.h"
+#include "arch/x86/instruction_set_features_x86.h"
 #include "art_method.h"
 #include "backend_x86.h"
 #include "base/logging.h"
@@ -585,6 +585,8 @@
       case kX86LockCmpxchgAR:
       case kX86LockCmpxchg64M:
       case kX86LockCmpxchg64A:
+      case kX86LockCmpxchg64AR:
+      case kX86LockAdd32MI8:
       case kX86XchgMR:
       case kX86Mfence:
         // Atomic memory instructions provide full barrier.
@@ -598,7 +600,9 @@
 }
 
 bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
+  const X86InstructionSetFeatures* features =
+    cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
+  if (!features->IsSmp()) {
     return false;
   }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -610,20 +614,34 @@
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
+  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
+  bool use_locked_add = features->PrefersLockedAddSynchronization();
   if (barrier_kind == kAnyAny) {
-    // If no LIR exists already that can be used a barrier, then generate an mfence.
+    // If no LIR exists already that can be used a barrier, then generate a barrier.
     if (mem_barrier == nullptr) {
-      mem_barrier = NewLIR0(kX86Mfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Mfence);
+      }
       ret = true;
     }
 
-    // If last instruction does not provide full barrier, then insert an mfence.
+    // If last instruction does not provide full barrier, then insert a barrier.
     if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
-      mem_barrier = NewLIR0(kX86Mfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Mfence);
+      }
       ret = true;
     }
   } else if (barrier_kind == kNTStoreStore) {
-      mem_barrier = NewLIR0(kX86Sfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Sfence);
+      }
       ret = true;
   }
 
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d6a6a60..8cd6574 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -606,6 +606,7 @@
                                 // load-from-memory and store-to-memory instructions
   kX86Sfence,                   // memory barrier to serialize all previous
                                 // store-to-memory instructions
+  kX86LockAdd32MI8,             // locked add used to serialize memory instructions
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
   Binary0fOpCode(kX86Imul64),   // 64bit multiply
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 03bda78..2532bda 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -22,14 +22,10 @@
 
 namespace art {
 
-bool QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
-  bool result = verification_results_->ProcessVerifiedMethod(verifier);
-  if (result) {
-    MethodReference ref = verifier->GetMethodReference();
-    method_inliner_map_->GetMethodInliner(ref.dex_file)
-        ->AnalyseMethodCode(verifier);
-  }
-  return result;
+void QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) {
+  verification_results_->ProcessVerifiedMethod(verifier);
+  MethodReference ref = verifier->GetMethodReference();
+  method_inliner_map_->GetMethodInliner(ref.dex_file)->AnalyseMethodCode(verifier);
 }
 
 void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 03bf57b..4f5ea76 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -37,7 +37,7 @@
 
     ~QuickCompilerCallbacks() { }
 
-    bool MethodVerified(verifier::MethodVerifier* verifier)
+    void MethodVerified(verifier::MethodVerifier* verifier)
         SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE;
 
     void ClassRejected(ClassReference ref) OVERRIDE;
diff --git a/compiler/dex/type_inference_test.cc b/compiler/dex/type_inference_test.cc
index 528a18c..e2c0d32 100644
--- a/compiler/dex/type_inference_test.cc
+++ b/compiler/dex/type_inference_test.cc
@@ -253,7 +253,7 @@
         &cu_, cu_.class_loader, cu_.class_linker, *cu_.dex_file, nullptr /* code_item not used */,
         0u /* class_def_idx not used */, 0u /* method_index not used */,
         cu_.access_flags, nullptr /* verified_method not used */,
-        NullHandle<mirror::DexCache>()));
+        ScopedNullHandle<mirror::DexCache>()));
     cu_.mir_graph->current_method_ = 0u;
     code_item_ = static_cast<DexFile::CodeItem*>(
         cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 65b0ad6..dd24220 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -44,14 +44,14 @@
   }
 }
 
-bool VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
+void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
   DCHECK(method_verifier != nullptr);
   MethodReference ref = method_verifier->GetMethodReference();
   bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
   const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
   if (verified_method == nullptr) {
-    // Do not report an error to the verifier. We'll just punt this later.
-    return true;
+    // We'll punt this later.
+    return;
   }
 
   WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
@@ -69,11 +69,10 @@
     // is unsafe to replace the existing one since the JIT may be using it to generate a
     // native GC map.
     delete verified_method;
-    return true;
+    return;
   }
   verified_methods_.Put(ref, verified_method);
   DCHECK(verified_methods_.find(ref) != verified_methods_.end());
-  return true;
 }
 
 const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 9934f6b..da80bf0 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -42,7 +42,7 @@
     explicit VerificationResults(const CompilerOptions* compiler_options);
     ~VerificationResults();
 
-    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+    void ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
         SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(!verified_methods_lock_);
 
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index ef10b67..d6961a0 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -20,8 +20,8 @@
 #include <iosfwd>
 #include <memory>
 
+#include "base/length_prefixed_array.h"
 #include "base/macros.h"
-#include "length_prefixed_array.h"
 #include "utils/array_ref.h"
 #include "utils/dedupe_set.h"
 #include "utils/swap_space.h"
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 10841e6..0eb3e43 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -264,18 +264,16 @@
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) {
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
-  ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
-  DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
+  ArtMethod* resolved_method =
+      check_incompatible_class_change
+          ? mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kForceICCECheck>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type)
+          : mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
   if (UNLIKELY(resolved_method == nullptr)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     // Clean up any exception left by type resolution.
     soa.Self()->ClearException();
-    return nullptr;
-  }
-  if (check_incompatible_class_change &&
-      UNLIKELY(resolved_method->CheckIncompatibleClassChange(invoke_type))) {
-    // Silently return null on incompatible class change.
-    return nullptr;
   }
   return resolved_method;
 }
@@ -361,7 +359,7 @@
     ArtMethod* called_method;
     ClassLinker* class_linker = mUnit->GetClassLinker();
     if (LIKELY(devirt_target->dex_file == mUnit->GetDexFile())) {
-      called_method = class_linker->ResolveMethod(
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, dex_cache, class_loader,
           nullptr, kVirtual);
     } else {
@@ -369,7 +367,7 @@
       auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
           *devirt_target->dex_file,
           class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
-      called_method = class_linker->ResolveMethod(
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
     }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e42a737..5630b08 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -77,13 +76,13 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
 
+// Print additional info during profile guided compilation.
+static constexpr bool kDebugProfileGuidedCompilation = false;
+
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -348,8 +347,7 @@
                                const std::string& dump_cfg_file_name, bool dump_cfg_append,
                                CumulativeLogger* timer, int swap_fd,
                                const std::string& profile_file)
-    : profile_present_(false),
-      compiler_options_(compiler_options),
+    : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -387,12 +385,8 @@
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
-    profile_present_ = profile_file_.LoadFile(profile_file);
-    if (profile_present_) {
-      LOG(INFO) << "Using profile data form file " << profile_file;
-    } else {
-      LOG(INFO) << "Failed to load profile file " << profile_file;
-    }
+    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
+    LOG(INFO) << "Using profile data from file " << profile_file;
   }
 }
 
@@ -573,7 +567,9 @@
         (verified_method->GetEncounteredVerificationFailures() &
             (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
         // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref);
+        driver->IsMethodToCompile(method_ref) &&
+        driver->ShouldCompileBasedOnProfile(method_ref);
+
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return null.
       compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
@@ -770,6 +766,22 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
+bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
+  if (profile_compilation_info_ == nullptr) {
+    // If we miss profile information it means that we don't do a profile guided compilation.
+    // Return true, and let the other filters decide if the method should be compiled.
+    return true;
+  }
+  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] "
+        << (result ? "Compiled" : "Skipped") << " method:"
+        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+  }
+  return result;
+}
+
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
   ResolveCatchBlockExceptionsClassVisitor(
@@ -778,10 +790,7 @@
 
   virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
-      ResolveExceptionsForMethod(&m, pointer_size);
-    }
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
     }
     return true;
@@ -889,8 +898,10 @@
           *dex_file,
           Runtime::Current()->GetLinearAlloc())));
       Handle<mirror::Class> klass(hs2.NewHandle(
-          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
-                                    NullHandle<mirror::ClassLoader>())));
+          class_linker->ResolveType(*dex_file,
+                                    exception_type_idx,
+                                    dex_cache,
+                                    ScopedNullHandle<mirror::ClassLoader>())));
       if (klass.Get() == nullptr) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
@@ -1114,25 +1125,23 @@
 }
 
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
-  if (IsBootImage() &&
-      IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-          soa.Self(), dex_file, false);
-      mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-      if (resolved_class == nullptr) {
-        // Erroneous class.
-        stats_->TypeNotInDexCache();
-        return false;
-      }
-    }
+  bool result = false;
+  if ((IsBootImage() &&
+       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) ||
+      Runtime::Current()->UseJit()) {
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+        soa.Self(), dex_file, false);
+    mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+    result = (resolved_class != nullptr);
+  }
+
+  if (result) {
     stats_->TypeInDexCache();
-    return true;
   } else {
     stats_->TypeNotInDexCache();
-    return false;
   }
+  return result;
 }
 
 bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
@@ -1140,15 +1149,21 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsBootImage()) {
-    // We resolve all const-string strings when building for the image.
+  if (IsBootImage() || Runtime::Current()->UseJit()) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
         soa.Self(), dex_file, false)));
-    class_linker->ResolveString(dex_file, string_idx, dex_cache);
-    result = true;
+    if (IsBootImage()) {
+      // We resolve all const-string strings when building for the image.
+      class_linker->ResolveString(dex_file, string_idx, dex_cache);
+      result = true;
+    } else {
+      // Just check whether the dex cache already has the string.
+      DCHECK(Runtime::Current()->UseJit());
+      result = (dex_cache->GetResolvedString(string_idx) != nullptr);
+    }
   }
   if (result) {
     stats_->StringInDexCache();
@@ -1908,7 +1923,7 @@
       }
       if (resolve_fields_and_methods) {
         while (it.HasNextDirectMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -1917,7 +1932,7 @@
           it.Next();
         }
         while (it.HasNextVirtualMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -2043,6 +2058,7 @@
                                                 dex_cache,
                                                 class_loader,
                                                 &class_def,
+                                                Runtime::Current()->GetCompilerCallbacks(),
                                                 true /* allow soft failures */,
                                                 true /* log hard failures */,
                                                 &error_msg) ==
@@ -2279,6 +2295,16 @@
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
+  if (profile_compilation_info_ != nullptr) {
+    if (!profile_compilation_info_->Load(dex_files)) {
+      LOG(WARNING) << "Failed to load offline profile info from "
+          << profile_compilation_info_->GetFilename()
+          << ". No methods will be compiled";
+    } else if (kDebugProfileGuidedCompilation) {
+      LOG(INFO) << "[ProfileGuidedCompilation] "
+          << profile_compilation_info_->DumpInfo();
+    }
+  }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
@@ -2516,52 +2542,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
-bool CompilerDriver::SkipCompilation(const std::string& method_name) {
-  if (!profile_present_) {
-    return false;
-  }
-  // First find the method in the profile file.
-  ProfileFile::ProfileData data;
-  if (!profile_file_.GetProfileData(&data, method_name)) {
-    // Not in profile, no information can be determined.
-    if (kIsDebugBuild) {
-      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
-    }
-    return true;
-  }
-
-  // Methods that comprise top_k_threshold % of the total samples will be compiled.
-  // Compare against the start of the topK percentage bucket just in case the threshold
-  // falls inside a bucket.
-  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
-                 <= compiler_options_->GetTopKProfileThreshold();
-  if (kIsDebugBuild) {
-    if (compile) {
-      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-    } else {
-      VLOG(compiler) << "not compiling method " << method_name
-          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
-          << "% samples)";
-    }
-  }
-  return !compile;
-}
-
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index dae785b..f0360ce 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -31,11 +31,11 @@
 #include "compiler.h"
 #include "dex_file.h"
 #include "driver/compiled_method_storage.h"
+#include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -59,7 +59,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -148,10 +147,6 @@
     return compiler_.get();
   }
 
-  bool ProfilePresent() const {
-    return profile_present_;
-  }
-
   // Are we compiling and creating an image file?
   bool IsBootImage() const {
     return boot_image_;
@@ -162,16 +157,11 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateJniDlsymLookup() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  const std::vector<uint8_t>* CreateJniDlsymLookup() const;
+  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const;
+  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const;
 
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
@@ -398,12 +388,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
@@ -452,6 +436,10 @@
   // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
   bool IsMethodToCompile(const MethodReference& method_ref) const;
 
+  // Checks whether profile guided compilation is enabled and if the method should be compiled
+  // according to the profile file.
+  bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
+
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       REQUIRES(!compiled_classes_lock_);
 
@@ -461,9 +449,6 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
-  // Should the compiler run on this method given profile information?
-  bool SkipCompilation(const std::string& method_name);
-
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
@@ -482,6 +467,10 @@
     return &compiled_method_storage_;
   }
 
+  // Can we assume that the klass is loaded?
+  bool CanAssumeClassIsLoaded(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -516,10 +505,6 @@
   bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Can we assume that the klass is loaded?
-  bool CanAssumeClassIsLoaded(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
@@ -602,9 +587,6 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  ProfileFile profile_file_;
-  bool profile_present_;
-
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -654,6 +636,9 @@
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
+  // Info for profile guided compilation.
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
+
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index f8de9fa..82c0e86 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -86,10 +86,7 @@
       mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader);
       CHECK(c != nullptr);
       const auto pointer_size = class_linker->GetImagePointerSize();
-      for (auto& m : c->GetDirectMethods(pointer_size)) {
-        MakeExecutable(&m);
-      }
-      for (auto& m : c->GetVirtualMethods(pointer_size)) {
+      for (auto& m : c->GetMethods(pointer_size)) {
         MakeExecutable(&m);
       }
     }
@@ -145,16 +142,21 @@
   // TODO: check that all Method::GetCode() values are non-null
 }
 
-TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
+TEST_F(CompilerDriverTest, DISABLED_AbstractMethodErrorStub) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable",
+    CompileVirtualMethod(ScopedNullHandle<mirror::ClassLoader>(),
+                         "java.lang.Class",
+                         "isFinalizable",
                          "()Z");
-    CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V");
+    CompileDirectMethod(ScopedNullHandle<mirror::ClassLoader>(),
+                        "java.lang.Object",
+                        "<init>",
+                        "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != nullptr);
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a24c8a3..8c38cf2 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -34,6 +34,7 @@
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
+      native_debuggable_(kDefaultNativeDebuggable),
       generate_debug_info_(kDefaultGenerateDebugInfo),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
@@ -81,6 +82,7 @@
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     debuggable_(debuggable),
+    native_debuggable_(kDefaultNativeDebuggable),
     generate_debug_info_(generate_debug_info),
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
@@ -117,7 +119,7 @@
 }
 
 void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
-  ParseUintOption(option, "--inline-max-code-units=", &inline_max_code_units_, Usage);
+  ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
 }
 
 void CompilerOptions::ParseDisablePasses(const StringPiece& option,
@@ -207,6 +209,10 @@
   } else if (option == "--debuggable") {
     debuggable_ = true;
     generate_debug_info_ = true;
+  } else if (option == "--native-debuggable") {
+    native_debuggable_ = true;
+    debuggable_ = true;
+    generate_debug_info_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
   } else if (option == "--include-patch-information") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e6acab4..2b047a2 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -49,6 +49,7 @@
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
+  static const bool kDefaultNativeDebuggable = false;
   static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
@@ -162,6 +163,10 @@
     return debuggable_;
   }
 
+  bool GetNativeDebuggable() const {
+    return native_debuggable_;
+  }
+
   bool GetGenerateDebugInfo() const {
     return generate_debug_info_;
   }
@@ -240,6 +245,7 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool debuggable_;
+  bool native_debuggable_;
   bool generate_debug_info_;
   bool implicit_null_checks_;
   bool implicit_so_checks_;
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index d9b367b..a551e4b 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <unordered_map>
 
+#include "base/casts.h"
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/writer.h"
 #include "leb128.h"
@@ -47,9 +48,9 @@
  * It also handles generation of abbreviations.
  *
  * Usage:
- *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ *   StartTag(DW_TAG_compile_unit);
  *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
- *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ *     StartTag(DW_TAG_subprogram);
  *       WriteStrp(DW_AT_name, "Foo", debug_str);
  *     EndTag();
  *   EndTag();
@@ -59,36 +60,40 @@
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
  public:
+  static constexpr size_t kCompilationUnitHeaderSize = 11;
+
   // Start debugging information entry.
-  void StartTag(Tag tag, Children children) {
-    DCHECK(has_children) << "This tag can not have nested tags";
+  // Returns offset of the entry in compilation unit.
+  size_t StartTag(Tag tag) {
     if (inside_entry_) {
       // Write abbrev code for the previous entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Parent entry is finalized before any children are written.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
       inside_entry_ = false;
     }
-    StartAbbrev(tag, children);
+    StartAbbrev(tag);
     // Abbrev code placeholder of sufficient size.
     abbrev_code_offset_ = this->data()->size();
     this->PushUleb128(NextAbbrevCode());
     depth_++;
     inside_entry_ = true;
-    has_children = (children == DW_CHILDREN_yes);
+    return abbrev_code_offset_ + kCompilationUnitHeaderSize;
   }
 
   // End debugging information entry.
   void EndTag() {
     DCHECK_GT(depth_, 0);
     if (inside_entry_) {
-      // Write abbrev code for this tag.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Write abbrev code for this entry.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
       inside_entry_ = false;
-    }
-    if (has_children) {
-      this->PushUint8(0);  // End of children.
+      // This entry has no children and so there is no terminator.
+    } else {
+      // The entry has been already finalized so it must be parent entry
+      // and we need to write the terminator required by DW_CHILDREN_yes.
+      this->PushUint8(0);
     }
     depth_--;
-    has_children = true;  // Parent tag obviously has children.
   }
 
   void WriteAddr(Attribute attrib, uint64_t value) {
@@ -101,10 +106,16 @@
     }
   }
 
-  void WriteBlock(Attribute attrib, const void* ptr, int size) {
+  void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) {
     AddAbbrevAttribute(attrib, DW_FORM_block);
-    this->PushUleb128(size);
-    this->PushData(ptr, size);
+    this->PushUleb128(num_bytes);
+    this->PushData(ptr, num_bytes);
+  }
+
+  void WriteExprLoc(Attribute attrib, const void* ptr, size_t num_bytes) {
+    AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    this->PushUleb128(dchecked_integral_cast<uint32_t>(num_bytes));
+    this->PushData(ptr, num_bytes);
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
@@ -127,6 +138,11 @@
     this->PushUint64(value);
   }
 
+  void WriteSecOffset(Attribute attrib, uint32_t offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    this->PushUint32(offset);
+  }
+
   void WriteSdata(Attribute attrib, int value) {
     AddAbbrevAttribute(attrib, DW_FORM_sdata);
     this->PushSleb128(value);
@@ -147,12 +163,12 @@
     this->PushUint8(value ? 1 : 0);
   }
 
-  void WriteRef4(Attribute attrib, int cu_offset) {
+  void WriteRef4(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref4);
     this->PushUint32(cu_offset);
   }
 
-  void WriteRef(Attribute attrib, int cu_offset) {
+  void WriteRef(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
     this->PushUleb128(cu_offset);
   }
@@ -162,16 +178,21 @@
     this->PushString(value);
   }
 
-  void WriteStrp(Attribute attrib, int address) {
+  void WriteStrp(Attribute attrib, size_t debug_str_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    this->PushUint32(address);
+    this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
   }
 
-  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+  void WriteStrp(Attribute attrib, const char* str, size_t len,
+                 std::vector<uint8_t>* debug_str) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    int address = debug_str->size();
-    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
-    this->PushUint32(address);
+    this->PushUint32(debug_str->size());
+    debug_str->insert(debug_str->end(), str, str + len);
+    debug_str->push_back(0);
+  }
+
+  void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) {
+    WriteStrp(attrib, str, strlen(str), debug_str);
   }
 
   bool Is64bit() const { return is64bit_; }
@@ -180,7 +201,11 @@
     return patch_locations_;
   }
 
+  int Depth() const { return depth_; }
+
   using Writer<Vector>::data;
+  using Writer<Vector>::size;
+  using Writer<Vector>::UpdateUint32;
 
   DebugInfoEntryWriter(bool is64bitArch,
                        Vector* debug_abbrev,
@@ -196,16 +221,17 @@
   }
 
   ~DebugInfoEntryWriter() {
+    DCHECK(!inside_entry_);
     DCHECK_EQ(depth_, 0);
   }
 
  private:
   // Start abbreviation declaration.
-  void StartAbbrev(Tag tag, Children children) {
-    DCHECK(!inside_entry_);
+  void StartAbbrev(Tag tag) {
     current_abbrev_.clear();
     EncodeUnsignedLeb128(&current_abbrev_, tag);
-    current_abbrev_.push_back(children);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
   }
 
   // Add attribute specification.
@@ -220,8 +246,9 @@
   }
 
   // End abbreviation declaration and return its code.
-  int EndAbbrev() {
-    DCHECK(inside_entry_);
+  int EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
     auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
                                                   NextAbbrevCode()));
     int abbrev_code = it.first->second;
@@ -241,6 +268,7 @@
   // Fields for writing and deduplication of abbrevs.
   Writer<Vector> debug_abbrev_;
   Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
   std::unordered_map<Vector, int,
                      FNVHash<Vector> > abbrev_codes_;
 
@@ -250,7 +278,6 @@
   int depth_ = 0;
   size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
   bool inside_entry_ = false;  // Entry ends at first child (if any).
-  bool has_children = true;
   std::vector<uintptr_t> patch_locations_;
 };
 
diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
new file mode 100644
index 0000000..7fb21b7
--- /dev/null
+++ b/compiler/dwarf/dedup_vector.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+
+#include <vector>
+#include <unordered_map>
+
+namespace art {
+namespace dwarf {
+  class DedupVector {
+   public:
+    // Returns an offset to previously inserted identical block of data,
+    // or appends the data at the end of the vector and returns offset to it.
+    size_t Insert(const uint8_t* ptr, size_t num_bytes) {
+      // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+      uint32_t hash = 2166136261u;
+      for (size_t i = 0; i < num_bytes; i++) {
+        hash = (hash ^ ptr[i]) * 16777619u;
+      }
+      // Try to find existing copy of the data.
+      const auto& range = hash_to_offset_.equal_range(hash);
+      for (auto it = range.first; it != range.second; ++it) {
+        const size_t offset = it->second;
+        if (offset + num_bytes <= vector_.size() &&
+            memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
+          return offset;
+        }
+      }
+      // Append the data at the end of the vector.
+      const size_t new_offset = vector_.size();
+      hash_to_offset_.emplace(hash, new_offset);
+      vector_.insert(vector_.end(), ptr, ptr + num_bytes);
+      return new_offset;
+    }
+
+    const std::vector<uint8_t>& Data() const { return vector_; }
+
+   private:
+    struct IdentityHash {
+      size_t operator()(uint32_t v) const { return v; }
+    };
+
+    // We store the full hash as the key to simplify growing of the table.
+    // It avoids storing or referencing the actual data in the hash-table.
+    std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
+
+    std::vector<uint8_t> vector_;
+  };
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEDUP_VECTOR_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 6bb22ed..e9cd421 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -285,7 +285,7 @@
   constexpr bool is64bit = false;
   DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
   DW_CHECK("Contents of the .debug_info section:");
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
   info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
@@ -293,7 +293,7 @@
   DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
@@ -302,7 +302,7 @@
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
   info.EndTag();  // DW_TAG_subprogram
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
@@ -313,7 +313,7 @@
   info.EndTag();  // DW_TAG_subprogram
   info.EndTag();  // DW_TAG_compile_unit
   // Test that previous list was properly terminated and empty children.
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   info.EndTag();  // DW_TAG_compile_unit
 
   // The abbrev table is just side product, but check it as well.
@@ -327,7 +327,7 @@
   DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
   DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
   DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
-  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
+  DW_CHECK("3      DW_TAG_compile_unit    [no children]");
 
   std::vector<uintptr_t> debug_info_patches;
   std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 };  // NOLINT
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 5464ed9..c3a3ca9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -29,6 +29,7 @@
 #include "common_runtime_test.h"
 #include "elf_builder.h"
 #include "gtest/gtest.h"
+#include "linker/file_output_stream.h"
 #include "os.h"
 
 namespace art {
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index 633e2f7..f76f76f 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -134,10 +134,11 @@
   Writer<> writer(debug_info);
   size_t start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
-  writer.PushUint16(3);  // Version.
+  writer.PushUint16(4);  // Version.
   writer.PushUint32(debug_abbrev_offset);
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
+  DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
   writer.PushData(*entries.data());
   writer.UpdateUint32(start, writer.data()->size() - start - 4);
   // Copy patch locations and make them relative to .debug_info section.
@@ -166,10 +167,7 @@
   Writer<> writer(debug_line);
   size_t header_start = writer.data()->size();
   writer.PushUint32(0);  // Section-length placeholder.
-  // Claim DWARF-2 version even though we use some DWARF-3 features.
-  // DWARF-2 consumers will ignore the unknown opcodes.
-  // This is what clang currently does.
-  writer.PushUint16(2);  // .debug_line version.
+  writer.PushUint16(3);  // .debug_line version.
   size_t header_length_pos = writer.data()->size();
   writer.PushUint32(0);  // Header-length placeholder.
   writer.PushUint8(1 << opcodes.GetCodeFactorBits());
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
new file mode 100644
index 0000000..a391e4d
--- /dev/null
+++ b/compiler/dwarf/method_debug_info.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+
+#include "dex_file.h"
+
+namespace art {
+class CompiledMethod;
+namespace dwarf {
+
+struct MethodDebugInfo {
+  const DexFile* dex_file_;
+  size_t class_def_index_;
+  uint32_t dex_method_index_;
+  uint32_t access_flags_;
+  const DexFile::CodeItem* code_item_;
+  bool deduped_;
+  uint32_t low_pc_;
+  uint32_t high_pc_;
+  CompiledMethod* compiled_method_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index 7045237..b67e8dd 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -35,9 +35,10 @@
   //   Arm64 mapping is correct since we already do this there.
   //   libunwind might struggle with the new mapping as well.
 
-  static Reg ArmCore(int num) { return Reg(num); }
+  static Reg ArmCore(int num) { return Reg(num); }  // R0-R15.
   static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
-  static Reg Arm64Core(int num) { return Reg(num); }
+  static Reg ArmDp(int num) { return Reg(256 + num); }  // D0–D31.
+  static Reg Arm64Core(int num) { return Reg(num); }  // X0-X31.
   static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
   static Reg MipsCore(int num) { return Reg(num); }
   static Reg Mips64Core(int num) { return Reg(num); }
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index 00b9dfa..d2add7f 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -114,9 +114,9 @@
     data_->insert(data_->end(), value, value + strlen(value) + 1);
   }
 
-  void PushData(const void* ptr, size_t size) {
+  void PushData(const void* ptr, size_t num_bytes) {
     const char* p = reinterpret_cast<const char*>(ptr);
-    data_->insert(data_->end(), p, p + size);
+    data_->insert(data_->end(), p, p + num_bytes);
   }
 
   template<typename Vector2>
@@ -164,6 +164,10 @@
     return data_;
   }
 
+  size_t size() const {
+    return data_->size();
+  }
+
   explicit Writer(Vector* buffer) : data_(buffer) { }
 
  private:
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 6e8dfd6..bb07cc2 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -23,10 +23,10 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "elf_utils.h"
-#include "file_output_stream.h"
 #include "leb128.h"
+#include "linker/error_delaying_output_stream.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -100,7 +100,7 @@
       header_.sh_entsize = entsize;
     }
 
-    virtual ~Section() {
+    ~Section() OVERRIDE {
       if (started_) {
         CHECK(finished_);
       }
@@ -120,8 +120,8 @@
       sections.push_back(this);
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
@@ -139,7 +139,7 @@
         CHECK_GT(header_.sh_size, 0u);
       } else {
         // Use the current file position to determine section size.
-        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
         CHECK_GE(file_offset, (off_t)header_.sh_offset);
         header_.sh_size = file_offset - header_.sh_offset;
       }
@@ -161,7 +161,7 @@
       } else {
         CHECK(started_);
         CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
-        return owner_->Seek(0, kSeekCurrent) - header_.sh_offset;
+        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
       }
     }
 
@@ -176,15 +176,20 @@
     bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
       CHECK(started_);
       CHECK(!finished_);
-      owner_->WriteFully(buffer, byte_count);
-      return true;
+      return owner_->stream_.WriteFully(buffer, byte_count);
     }
 
     // This function always succeeds to simplify code.
     // Use builder's Good() to check the actual status.
     off_t Seek(off_t offset, Whence whence) OVERRIDE {
       // Forward the seek as-is and trust the caller to use it reasonably.
-      return owner_->Seek(offset, whence);
+      return owner_->stream_.Seek(offset, whence);
+    }
+
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->stream_.Flush();
     }
 
     Elf_Word GetSectionIndex() const {
@@ -270,26 +275,24 @@
   };
 
   ElfBuilder(InstructionSet isa, OutputStream* output)
-    : isa_(isa),
-      output_(output),
-      output_good_(true),
-      output_offset_(0),
-      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
-      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
-      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
-      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
-      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-      strtab_(this, ".strtab", 0, kPageSize),
-      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
-      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
-      debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      shstrtab_(this, ".shstrtab", 0, 1),
-      virtual_address_(0) {
+      : isa_(isa),
+        stream_(output),
+        rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+        dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+        hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+        dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+        eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+        strtab_(this, ".strtab", 0, kPageSize),
+        symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+        debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+        debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        shstrtab_(this, ".shstrtab", 0, 1),
+        virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
     dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -312,7 +315,7 @@
 
   // Encode patch locations as LEB128 list of deltas between consecutive addresses.
   // (exposed publicly for tests)
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
                                std::vector<uint8_t>* buffer) {
     buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
     uintptr_t address = 0;  // relative to start of section.
@@ -323,9 +326,9 @@
     }
   }
 
-  void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) {
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
     std::vector<uint8_t> buffer;
-    EncodeOatPatches(*patch_locations, &buffer);
+    EncodeOatPatches(patch_locations, &buffer);
     std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
     s->Start();
     s->WriteFully(buffer.data(), buffer.size());
@@ -346,7 +349,7 @@
     // We do not know the number of headers until later, so
     // it is easiest to just reserve a fixed amount of space.
     int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Seek(size, kSeekSet);
+    stream_.Seek(size, kSeekSet);
     virtual_address_ += size;
   }
 
@@ -370,9 +373,14 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    Seek(section_headers_offset, kSeekSet);
-    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    stream_.Seek(section_headers_offset, kSeekSet);
+    stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Flush everything else before writing the program headers. This should prevent
+    // the OS from reordering writes, so that we don't end up with valid headers
+    // and partially written data if we suddenly lose power, for example.
+    stream_.Flush();
 
     // Write the initial file headers.
     std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
@@ -382,9 +390,10 @@
     elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-    Seek(0, kSeekSet);
-    WriteFully(&elf_header, sizeof(elf_header));
-    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Seek(0, kSeekSet);
+    stream_.WriteFully(&elf_header, sizeof(elf_header));
+    stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Flush();
   }
 
   // The running program does not have access to section headers
@@ -462,53 +471,15 @@
 
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
-    return output_good_;
+    return stream_.Good();
+  }
+
+  // Returns the builder's internal stream.
+  OutputStream* GetStream() {
+    return &stream_;
   }
 
  private:
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  void WriteFully(const void* buffer, size_t byte_count) {
-    if (output_good_) {
-      if (!output_->WriteFully(buffer, byte_count)) {
-        PLOG(ERROR) << "Failed to write " << byte_count
-                    << " bytes to ELF file at offset " << output_offset_;
-        output_good_ = false;
-      }
-    }
-    output_offset_ += byte_count;
-  }
-
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  off_t Seek(off_t offset, Whence whence) {
-    // We keep shadow copy of the offset so that we return
-    // the expected value even if the output stream failed.
-    off_t new_offset;
-    switch (whence) {
-      case kSeekSet:
-        new_offset = offset;
-        break;
-      case kSeekCurrent:
-        new_offset = output_offset_ + offset;
-        break;
-      default:
-        LOG(FATAL) << "Unsupported seek type: " << whence;
-        UNREACHABLE();
-    }
-    if (output_good_) {
-      off_t actual_offset = output_->Seek(offset, whence);
-      if (actual_offset == (off_t)-1) {
-        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
-                    << " whence=" << whence << " new_offset=" << new_offset;
-        output_good_ = false;
-      }
-      DCHECK_EQ(actual_offset, new_offset);
-    }
-    output_offset_ = new_offset;
-    return new_offset;
-  }
-
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
@@ -660,9 +631,7 @@
 
   InstructionSet isa_;
 
-  OutputStream* output_;
-  bool output_good_;  // True if all writes to output succeeded.
-  off_t output_offset_;  // Keep track of the current position in the stream.
+  ErrorDelayingOutputStream stream_;
 
   Section rodata_;
   Section text_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..c5a0fd5 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
 
 class ElfWriter {
  public:
@@ -46,21 +49,26 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void SetBssSize(size_t bss_size) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+  // Get the ELF writer's stream. This stream can be used for writing data directly
+  // to a section after the section has been finished. When that's done, the user
+  // should Seek() back to the position where the stream was before this operation.
+  virtual OutputStream* GetStream() = 0;
+
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index e1ab340..2bc8c89 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -17,20 +17,72 @@
 #include "elf_writer_debug.h"
 
 #include <unordered_set>
+#include <vector>
 
 #include "base/casts.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
+#include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
 #include "oat_writer.h"
 #include "utils.h"
+#include "stack_map.h"
 
 namespace art {
 namespace dwarf {
 
+// The ARM specification defines three special mapping symbols
+// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
+// These symbols can be used by tools, for example, to pretty
+// print instructions correctly.  Objdump will use them if they
+// exist, but it will still work well without them.
+// However, these extra symbols take space, so let's just generate
+// one symbol which marks the whole .text section as code.
+constexpr bool kGenerateSingleArmMappingSymbol = true;
+
+static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmCore(machine_reg);
+    case kArm64:
+      return Reg::Arm64Core(machine_reg);
+    case kX86:
+      return Reg::X86Core(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Core(machine_reg);
+    case kMips:
+      return Reg::MipsCore(machine_reg);
+    case kMips64:
+      return Reg::Mips64Core(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
+static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmFp(machine_reg);
+    case kArm64:
+      return Reg::Arm64Fp(machine_reg);
+    case kX86:
+      return Reg::X86Fp(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Fp(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
 static void WriteCIE(InstructionSet isa,
                      CFIFormat format,
                      std::vector<uint8_t>* buffer) {
@@ -162,10 +214,9 @@
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
-  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
-        format == dwarf::DW_EH_FRAME_FORMAT);
+  CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
   std::vector<uint32_t> binary_search_table;
@@ -177,7 +228,7 @@
   }
 
   // Write .eh_frame/.debug_frame section.
-  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+  auto* cfi_section = (format == DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
                        : builder->GetEhFrame());
   {
@@ -192,7 +243,7 @@
     cfi_section->WriteFully(buffer.data(), buffer.size());
     buffer_address += buffer.size();
     buffer.clear();
-    for (const OatWriter::DebugInfo& mi : method_infos) {
+    for (const MethodDebugInfo& mi : method_infos) {
       if (!mi.deduped_) {  // Only one FDE per unique address.
         ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
         if (!opcodes.empty()) {
@@ -245,14 +296,580 @@
     header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
     header_section->End();
   } else {
-    builder->WritePatches(".debug_frame.oat_patches", &patch_locations);
+    builder->WritePatches(".debug_frame.oat_patches",
+                          ArrayRef<const uintptr_t>(patch_locations));
   }
 }
 
+namespace {
+  struct CompilationUnit {
+    std::vector<const MethodDebugInfo*> methods_;
+    size_t debug_line_offset_ = 0;
+    uint32_t low_pc_ = 0xFFFFFFFFU;
+    uint32_t high_pc_ = 0;
+  };
+
+  typedef std::vector<DexFile::LocalInfo> LocalInfos;
+
+  void LocalInfoCallback(void* ctx, const DexFile::LocalInfo& entry) {
+    static_cast<LocalInfos*>(ctx)->push_back(entry);
+  }
+
+  typedef std::vector<DexFile::PositionInfo> PositionInfos;
+
+  bool PositionInfoCallback(void* ctx, const DexFile::PositionInfo& entry) {
+    static_cast<PositionInfos*>(ctx)->push_back(entry);
+    return false;
+  }
+
+  std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
+    std::vector<const char*> names;
+    if (mi->code_item_ != nullptr) {
+      const uint8_t* stream = mi->dex_file_->GetDebugInfoStream(mi->code_item_);
+      if (stream != nullptr) {
+        DecodeUnsignedLeb128(&stream);  // line.
+        uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+        for (uint32_t i = 0; i < parameters_size; ++i) {
+          uint32_t id = DecodeUnsignedLeb128P1(&stream);
+          names.push_back(mi->dex_file_->StringDataByIdx(id));
+        }
+      }
+    }
+    return names;
+  }
+
+  struct VariableLocation {
+    uint32_t low_pc;
+    uint32_t high_pc;
+    DexRegisterLocation reg_lo;  // May be None if the location is unknown.
+    DexRegisterLocation reg_hi;  // Most significant bits of 64-bit value.
+  };
+
+  // Get the location of given dex register (e.g. stack or machine register).
+  // Note that the location might be different based on the current pc.
+  // The result will cover all ranges where the variable is in scope.
+  std::vector<VariableLocation> GetVariableLocations(const MethodDebugInfo* method_info,
+                                                     uint16_t vreg,
+                                                     bool is64bitValue,
+                                                     uint32_t dex_pc_low,
+                                                     uint32_t dex_pc_high) {
+    std::vector<VariableLocation> variable_locations;
+
+    // Get stack maps sorted by pc (they might not be sorted internally).
+    const CodeInfo code_info(method_info->compiled_method_->GetVmapTable().data());
+    const StackMapEncoding encoding = code_info.ExtractEncoding();
+    std::map<uint32_t, StackMap> stack_maps;
+    for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+      StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+      DCHECK(stack_map.IsValid());
+      const uint32_t low_pc = method_info->low_pc_ + stack_map.GetNativePcOffset(encoding);
+      DCHECK_LE(low_pc, method_info->high_pc_);
+      stack_maps.emplace(low_pc, stack_map);
+    }
+
+    // Create entries for the requested register based on stack map data.
+    for (auto it = stack_maps.begin(); it != stack_maps.end(); it++) {
+      const StackMap& stack_map = it->second;
+      const uint32_t low_pc = it->first;
+      auto next_it = it;
+      next_it++;
+      const uint32_t high_pc = next_it != stack_maps.end() ? next_it->first
+                                                           : method_info->high_pc_;
+      DCHECK_LE(low_pc, high_pc);
+      if (low_pc == high_pc) {
+        continue;  // Ignore if the address range is empty.
+      }
+
+      // Check that the stack map is in the requested range.
+      uint32_t dex_pc = stack_map.GetDexPc(encoding);
+      if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) {
+        continue;
+      }
+
+      // Find the location of the dex register.
+      DexRegisterLocation reg_lo = DexRegisterLocation::None();
+      DexRegisterLocation reg_hi = DexRegisterLocation::None();
+      if (stack_map.HasDexRegisterMap(encoding)) {
+        DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
+            stack_map, encoding, method_info->code_item_->registers_size_);
+        reg_lo = dex_register_map.GetDexRegisterLocation(
+            vreg, method_info->code_item_->registers_size_, code_info, encoding);
+        if (is64bitValue) {
+          reg_hi = dex_register_map.GetDexRegisterLocation(
+              vreg + 1, method_info->code_item_->registers_size_, code_info, encoding);
+        }
+      }
+
+      // Add location entry for this address range.
+      if (!variable_locations.empty() &&
+          variable_locations.back().reg_lo == reg_lo &&
+          variable_locations.back().reg_hi == reg_hi &&
+          variable_locations.back().high_pc == low_pc) {
+        // Merge with the previous entry (extend its range).
+        variable_locations.back().high_pc = high_pc;
+      } else {
+        variable_locations.push_back({low_pc, high_pc, reg_lo, reg_hi});
+      }
+    }
+
+    return variable_locations;
+  }
+
+  bool IsFromOptimizingCompiler(const MethodDebugInfo* method_info) {
+    return method_info->compiled_method_->GetQuickCode().size() > 0 &&
+           method_info->compiled_method_->GetVmapTable().size() > 0 &&
+           method_info->compiled_method_->GetGcMap().size() == 0 &&
+           method_info->code_item_ != nullptr;
+  }
+}  // namespace
+
+// Helper class to write .debug_info and its supporting sections.
 template<typename ElfTypes>
 class DebugInfoWriter {
   typedef typename ElfTypes::Addr Elf_Addr;
 
+  // Helper class to write one compilation unit.
+  // It holds helper methods and temporary state.
+  class CompilationUnitWriter {
+   public:
+    explicit CompilationUnitWriter(DebugInfoWriter* owner)
+      : owner_(owner),
+        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) {
+    }
+
+    void Write(const CompilationUnit& compilation_unit) {
+      CHECK(!compilation_unit.methods_.empty());
+      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+      info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
+      info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
+      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
+
+      const char* last_dex_class_desc = nullptr;
+      for (auto mi : compilation_unit.methods_) {
+        const DexFile* dex = mi->dex_file_;
+        const DexFile::CodeItem* dex_code = mi->code_item_;
+        const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index_);
+        const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
+        const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
+        const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method);
+        const bool is_static = (mi->access_flags_ & kAccStatic) != 0;
+
+        // Enclose the method in correct class definition.
+        if (last_dex_class_desc != dex_class_desc) {
+          if (last_dex_class_desc != nullptr) {
+            EndClassTag(last_dex_class_desc);
+          }
+          size_t offset = StartClassTag(dex_class_desc);
+          type_cache_.emplace(dex_class_desc, offset);
+          // Check that each class is defined only once.
+          bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
+          CHECK(unique) << "Redefinition of " << dex_class_desc;
+          last_dex_class_desc = dex_class_desc;
+        }
+
+        int start_depth = info_.Depth();
+        info_.StartTag(DW_TAG_subprogram);
+        WriteName(dex->GetMethodName(dex_method));
+        info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
+        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        uint8_t frame_base[] = { DW_OP_call_frame_cfa };
+        info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
+        WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
+
+        // Write parameters. DecodeDebugLocalInfo returns them as well, but it does not
+        // guarantee order or uniqueness so it is safer to iterate over them manually.
+        // DecodeDebugLocalInfo might not also be available if there is no debug info.
+        std::vector<const char*> param_names = GetParamNames(mi);
+        uint32_t arg_reg = 0;
+        if (!is_static) {
+          info_.StartTag(DW_TAG_formal_parameter);
+          WriteName("this");
+          info_.WriteFlag(DW_AT_artificial, true);
+          WriteLazyType(dex_class_desc);
+          if (dex_code != nullptr) {
+            // Write the stack location of the parameter.
+            const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+            const bool is64bitValue = false;
+            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+          }
+          arg_reg++;
+          info_.EndTag();
+        }
+        if (dex_params != nullptr) {
+          for (uint32_t i = 0; i < dex_params->Size(); ++i) {
+            info_.StartTag(DW_TAG_formal_parameter);
+            // Parameter names may not be always available.
+            if (i < param_names.size()) {
+              WriteName(param_names[i]);
+            }
+            // Write the type.
+            const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
+            WriteLazyType(type_desc);
+            const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
+            if (dex_code != nullptr) {
+              // Write the stack location of the parameter.
+              const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+              WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+            }
+            arg_reg += is64bitValue ? 2 : 1;
+            info_.EndTag();
+          }
+          if (dex_code != nullptr) {
+            DCHECK_EQ(arg_reg, dex_code->ins_size_);
+          }
+        }
+
+        // Write local variables.
+        LocalInfos local_infos;
+        if (dex->DecodeDebugLocalInfo(dex_code,
+                                      is_static,
+                                      mi->dex_method_index_,
+                                      LocalInfoCallback,
+                                      &local_infos)) {
+          for (const DexFile::LocalInfo& var : local_infos) {
+            if (var.reg_ < dex_code->registers_size_ - dex_code->ins_size_) {
+              info_.StartTag(DW_TAG_variable);
+              WriteName(var.name_);
+              WriteLazyType(var.descriptor_);
+              bool is64bitValue = var.descriptor_[0] == 'D' || var.descriptor_[0] == 'J';
+              WriteRegLocation(mi, var.reg_, is64bitValue, compilation_unit.low_pc_,
+                               var.start_address_, var.end_address_);
+              info_.EndTag();
+            }
+          }
+        }
+
+        info_.EndTag();
+        CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
+      }
+      if (last_dex_class_desc != nullptr) {
+        EndClassTag(last_dex_class_desc);
+      }
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
+    // Write table into .debug_loc which describes location of dex register.
+    // The dex register might be valid only at some points and it might
+    // move between machine registers and stack.
+    void WriteRegLocation(const MethodDebugInfo* method_info,
+                          uint16_t vreg,
+                          bool is64bitValue,
+                          uint32_t compilation_unit_low_pc,
+                          uint32_t dex_pc_low = 0,
+                          uint32_t dex_pc_high = 0xFFFFFFFF) {
+      using Kind = DexRegisterLocation::Kind;
+      if (!IsFromOptimizingCompiler(method_info)) {
+        return;
+      }
+
+      Writer<> debug_loc(&owner_->debug_loc_);
+      Writer<> debug_ranges(&owner_->debug_ranges_);
+      info_.WriteSecOffset(DW_AT_location, debug_loc.size());
+      info_.WriteSecOffset(DW_AT_start_scope, debug_ranges.size());
+
+      std::vector<VariableLocation> variable_locations = GetVariableLocations(
+          method_info,
+          vreg,
+          is64bitValue,
+          dex_pc_low,
+          dex_pc_high);
+
+      // Write .debug_loc entries.
+      const InstructionSet isa = owner_->builder_->GetIsa();
+      const bool is64bit = Is64BitInstructionSet(isa);
+      for (const VariableLocation& variable_location : variable_locations) {
+        // Translate dex register location to DWARF expression.
+        // Note that 64-bit value might be split to two distinct locations.
+        // (for example, two 32-bit machine registers, or even stack and register)
+        uint8_t buffer[64];
+        uint8_t* pos = buffer;
+        DexRegisterLocation reg_lo = variable_location.reg_lo;
+        DexRegisterLocation reg_hi = variable_location.reg_hi;
+        for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) {
+          DexRegisterLocation reg_loc = (piece == 0 ? reg_lo : reg_hi);
+          const Kind kind = reg_loc.GetKind();
+          const int32_t value = reg_loc.GetValue();
+          if (kind == Kind::kInStack) {
+            const size_t frame_size = method_info->compiled_method_->GetFrameSizeInBytes();
+            *(pos++) = DW_OP_fbreg;
+            // The stack offset is relative to SP. Make it relative to CFA.
+            pos = EncodeSignedLeb128(pos, value - frame_size);
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
+                reg_hi.GetValue() == value + 4) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInRegister) {
+            pos = WriteOpReg(pos, GetDwarfCoreReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh &&
+                reg_hi.GetValue() == value) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInFpuRegister) {
+            if ((isa == kArm || isa == kThumb2) &&
+                piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister &&
+                reg_hi.GetValue() == value + 1 && value % 2 == 0) {
+              // Translate S register pair to D register (e.g. S4+S5 to D2).
+              pos = WriteOpReg(pos, Reg::ArmDp(value / 2).num());
+              break;
+            }
+            if (isa == kMips || isa == kMips64) {
+              // TODO: Find what the DWARF floating point register numbers are on MIPS.
+              break;
+            }
+            pos = WriteOpReg(pos, GetDwarfFpReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
+                reg_hi.GetValue() == reg_lo.GetValue()) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kConstant) {
+            *(pos++) = DW_OP_consts;
+            pos = EncodeSignedLeb128(pos, value);
+            *(pos++) = DW_OP_stack_value;
+          } else if (kind == Kind::kNone) {
+            break;
+          } else {
+            // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind().
+            // kInRegisterHigh and kInFpuRegisterHigh should be handled by
+            // the special cases above and they should not occur alone.
+            LOG(ERROR) << "Unexpected register location kind: "
+                       << DexRegisterLocation::PrettyDescriptor(kind);
+            break;
+          }
+          if (is64bitValue) {
+            // Write the marker which is needed by split 64-bit values.
+            // This code is skipped by the special cases.
+            *(pos++) = DW_OP_piece;
+            pos = EncodeUnsignedLeb128(pos, 4);
+          }
+        }
+
+        // Check that the buffer is large enough; keep half of it empty for safety.
+        DCHECK_LE(static_cast<size_t>(pos - buffer), sizeof(buffer) / 2);
+        if (pos > buffer) {
+          if (is64bit) {
+            debug_loc.PushUint64(variable_location.low_pc - compilation_unit_low_pc);
+            debug_loc.PushUint64(variable_location.high_pc - compilation_unit_low_pc);
+          } else {
+            debug_loc.PushUint32(variable_location.low_pc - compilation_unit_low_pc);
+            debug_loc.PushUint32(variable_location.high_pc - compilation_unit_low_pc);
+          }
+          // Write the expression.
+          debug_loc.PushUint16(pos - buffer);
+          debug_loc.PushData(buffer, pos - buffer);
+        } else {
+          // Do not generate .debug_loc if the location is not known.
+        }
+      }
+      // Write end-of-list entry.
+      if (is64bit) {
+        debug_loc.PushUint64(0);
+        debug_loc.PushUint64(0);
+      } else {
+        debug_loc.PushUint32(0);
+        debug_loc.PushUint32(0);
+      }
+
+      // Write .debug_ranges entries.
+      // This includes ranges where the variable is in scope but the location is not known.
+      for (size_t i = 0; i < variable_locations.size(); i++) {
+        uint32_t low_pc = variable_locations[i].low_pc;
+        uint32_t high_pc = variable_locations[i].high_pc;
+        while (i + 1 < variable_locations.size() && variable_locations[i+1].low_pc == high_pc) {
+          // Merge address range with the next entry.
+          high_pc = variable_locations[++i].high_pc;
+        }
+        if (is64bit) {
+          debug_ranges.PushUint64(low_pc - compilation_unit_low_pc);
+          debug_ranges.PushUint64(high_pc - compilation_unit_low_pc);
+        } else {
+          debug_ranges.PushUint32(low_pc - compilation_unit_low_pc);
+          debug_ranges.PushUint32(high_pc - compilation_unit_low_pc);
+        }
+      }
+      // Write end-of-list entry.
+      if (is64bit) {
+        debug_ranges.PushUint64(0);
+        debug_ranges.PushUint64(0);
+      } else {
+        debug_ranges.PushUint32(0);
+        debug_ranges.PushUint32(0);
+      }
+    }
+
+    // Some types are difficult to define as we go since they need
+    // to be enclosed in the right set of namespaces. Therefore we
+    // just define all types lazily at the end of compilation unit.
+    void WriteLazyType(const char* type_descriptor) {
+      if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
+        lazy_types_.emplace(type_descriptor, info_.size());
+        info_.WriteRef4(DW_AT_type, 0);
+      }
+    }
+
+    void FinishLazyTypes() {
+      for (const auto& lazy_type : lazy_types_) {
+        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+      }
+      lazy_types_.clear();
+    }
+
+   private:
+    void WriteName(const char* name) {
+      if (name != nullptr) {
+        info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+      }
+    }
+
+    // Helper which writes DWARF expression referencing a register.
+    static uint8_t* WriteOpReg(uint8_t* buffer, uint32_t dwarf_reg_num) {
+      if (dwarf_reg_num < 32) {
+        *(buffer++) = DW_OP_reg0 + dwarf_reg_num;
+      } else {
+        *(buffer++) = DW_OP_regx;
+        buffer = EncodeUnsignedLeb128(buffer, dwarf_reg_num);
+      }
+      return buffer;
+    }
+
+    // Convert dex type descriptor to DWARF.
+    // Returns offset in the compilation unit.
+    size_t WriteType(const char* desc) {
+      const auto& it = type_cache_.find(desc);
+      if (it != type_cache_.end()) {
+        return it->second;
+      }
+
+      size_t offset;
+      if (*desc == 'L') {
+        // Class type. For example: Lpackage/name;
+        offset = StartClassTag(desc);
+        info_.WriteFlag(DW_AT_declaration, true);
+        EndClassTag(desc);
+      } else if (*desc == '[') {
+        // Array type.
+        size_t element_type = WriteType(desc + 1);
+        offset = info_.StartTag(DW_TAG_array_type);
+        info_.WriteRef(DW_AT_type, element_type);
+        info_.EndTag();
+      } else {
+        // Primitive types.
+        const char* name;
+        uint32_t encoding;
+        uint32_t byte_size;
+        switch (*desc) {
+        case 'B':
+          name = "byte";
+          encoding = DW_ATE_signed;
+          byte_size = 1;
+          break;
+        case 'C':
+          name = "char";
+          encoding = DW_ATE_UTF;
+          byte_size = 2;
+          break;
+        case 'D':
+          name = "double";
+          encoding = DW_ATE_float;
+          byte_size = 8;
+          break;
+        case 'F':
+          name = "float";
+          encoding = DW_ATE_float;
+          byte_size = 4;
+          break;
+        case 'I':
+          name = "int";
+          encoding = DW_ATE_signed;
+          byte_size = 4;
+          break;
+        case 'J':
+          name = "long";
+          encoding = DW_ATE_signed;
+          byte_size = 8;
+          break;
+        case 'S':
+          name = "short";
+          encoding = DW_ATE_signed;
+          byte_size = 2;
+          break;
+        case 'Z':
+          name = "boolean";
+          encoding = DW_ATE_boolean;
+          byte_size = 1;
+          break;
+        case 'V':
+          LOG(FATAL) << "Void type should not be encoded";
+          UNREACHABLE();
+        default:
+          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          UNREACHABLE();
+        }
+        offset = info_.StartTag(DW_TAG_base_type);
+        WriteName(name);
+        info_.WriteData1(DW_AT_encoding, encoding);
+        info_.WriteData1(DW_AT_byte_size, byte_size);
+        info_.EndTag();
+      }
+
+      type_cache_.emplace(desc, offset);
+      return offset;
+    }
+
+    // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
+    // Returns offset of the class tag in the compilation unit.
+    size_t StartClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // Enclose the type in namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.StartTag(DW_TAG_namespace);
+        WriteName(std::string(desc, end - desc).c_str());
+      }
+      // Start the class tag.
+      size_t offset = info_.StartTag(DW_TAG_class_type);
+      end = strchr(desc, ';');
+      CHECK(end != nullptr);
+      WriteName(std::string(desc, end - desc).c_str());
+      return offset;
+    }
+
+    void EndClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // End the class tag.
+      info_.EndTag();
+      // Close namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.EndTag();
+      }
+    }
+
+    // For access to the ELF sections.
+    DebugInfoWriter<ElfTypes>* owner_;
+    // Debug abbrevs for this compilation unit only.
+    std::vector<uint8_t> debug_abbrev_;
+    // Temporary buffer to create and store the entries.
+    DebugInfoEntryWriter<> info_;
+    // Cache of already translated type descriptors.
+    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    // 32-bit references which need to be resolved to a type later.
+    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+  };
+
  public:
   explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
   }
@@ -261,54 +878,34 @@
     builder_->GetDebugInfo()->Start();
   }
 
-  void Write(const std::vector<const OatWriter::DebugInfo*>& method_infos,
-             size_t debug_line_offset) {
-    const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->GetAddress();
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : method_infos) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
-
-    size_t debug_abbrev_offset = debug_abbrev_.size();
-    DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_);
-    info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
-    info.WriteStrp(DW_AT_producer, "Android dex2oat", &debug_str_);
-    info.WriteData1(DW_AT_language, DW_LANG_Java);
-    info.WriteAddr(DW_AT_low_pc, text_address + cunit_low_pc);
-    info.WriteAddr(DW_AT_high_pc, text_address + cunit_high_pc);
-    info.WriteData4(DW_AT_stmt_list, debug_line_offset);
-    for (auto method_info : method_infos) {
-      std::string method_name = PrettyMethod(method_info->dex_method_index_,
-                                             *method_info->dex_file_, true);
-      info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
-      info.WriteStrp(DW_AT_name, method_name.data(), &debug_str_);
-      info.WriteAddr(DW_AT_low_pc, text_address + method_info->low_pc_);
-      info.WriteAddr(DW_AT_high_pc, text_address + method_info->high_pc_);
-      info.EndTag();  // DW_TAG_subprogram
-    }
-    info.EndTag();  // DW_TAG_compile_unit
-    std::vector<uint8_t> buffer;
-    buffer.reserve(info.data()->size() + KB);
-    size_t offset = builder_->GetDebugInfo()->GetSize();
-    WriteDebugInfoCU(debug_abbrev_offset, info, offset, &buffer, &debug_info_patches_);
-    builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+  void WriteCompilationUnit(const CompilationUnit& compilation_unit) {
+    CompilationUnitWriter writer(this);
+    writer.Write(compilation_unit);
   }
 
   void End() {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_);
-    builder_->WriteSection(".debug_abbrev", &debug_abbrev_);
-    builder_->WriteSection(".debug_str", &debug_str_);
+    builder_->WritePatches(".debug_info.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_info_patches_));
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
+    builder_->WriteSection(".debug_str", &debug_str_.Data());
+    builder_->WriteSection(".debug_loc", &debug_loc_);
+    builder_->WriteSection(".debug_ranges", &debug_ranges_);
   }
 
  private:
+  size_t WriteString(const char* str) {
+    return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1);
+  }
+
   ElfBuilder<ElfTypes>* builder_;
   std::vector<uintptr_t> debug_info_patches_;
-  std::vector<uint8_t> debug_abbrev_;
-  std::vector<uint8_t> debug_str_;
+  DedupVector debug_abbrev_;
+  DedupVector debug_str_;
+  std::vector<uint8_t> debug_loc_;
+  std::vector<uint8_t> debug_ranges_;
+
+  std::unordered_set<const char*> defined_dex_classes_;  // For CHECKs only.
 };
 
 template<typename ElfTypes>
@@ -325,15 +922,11 @@
 
   // Write line table for given set of methods.
   // Returns the number of bytes written.
-  size_t Write(const std::vector<const OatWriter::DebugInfo*>& method_infos) {
+  size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
     const Elf_Addr text_address = builder_->GetText()->GetAddress();
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : method_infos) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
+
+    compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
 
     std::vector<FileEntry> files;
     std::unordered_map<std::string, size_t> files_map;
@@ -358,30 +951,53 @@
         break;
     }
     DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
-    opcodes.SetAddress(text_address + cunit_low_pc);
-    if (dwarf_isa != -1) {
-      opcodes.SetISA(dwarf_isa);
-    }
-    for (const OatWriter::DebugInfo* mi : method_infos) {
-      struct DebugInfoCallbacks {
-        static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
-          auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
-          context->dex2line_.push_back({address, static_cast<int32_t>(line)});
-          return false;
+    for (const MethodDebugInfo* mi : compilation_unit.methods_) {
+      // Ignore function if we have already generated line table for the same address.
+      // It would confuse the debugger and the DWARF specification forbids it.
+      if (mi->deduped_) {
+        continue;
+      }
+
+      ArrayRef<const SrcMapElem> src_mapping_table;
+      std::vector<SrcMapElem> src_mapping_table_from_stack_maps;
+      if (IsFromOptimizingCompiler(mi)) {
+        // Use stack maps to create mapping table from pc to dex.
+        const CodeInfo code_info(mi->compiled_method_->GetVmapTable().data());
+        const StackMapEncoding encoding = code_info.ExtractEncoding();
+        for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+          StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+          DCHECK(stack_map.IsValid());
+          const uint32_t pc = stack_map.GetNativePcOffset(encoding);
+          const int32_t dex = stack_map.GetDexPc(encoding);
+          src_mapping_table_from_stack_maps.push_back({pc, dex});
         }
-        DefaultSrcMap dex2line_;
-      } debug_info_callbacks;
+        std::sort(src_mapping_table_from_stack_maps.begin(),
+                  src_mapping_table_from_stack_maps.end());
+        src_mapping_table = ArrayRef<const SrcMapElem>(src_mapping_table_from_stack_maps);
+      } else {
+        // Use the mapping table provided by the quick compiler.
+        src_mapping_table = mi->compiled_method_->GetSrcMappingTable();
+      }
+
+      if (src_mapping_table.empty()) {
+        continue;
+      }
 
       Elf_Addr method_address = text_address + mi->low_pc_;
 
+      PositionInfos position_infos;
       const DexFile* dex = mi->dex_file_;
-      if (mi->code_item_ != nullptr) {
-        dex->DecodeDebugInfo(mi->code_item_,
-                             (mi->access_flags_ & kAccStatic) != 0,
-                             mi->dex_method_index_,
-                             DebugInfoCallbacks::NewPosition,
-                             nullptr,
-                             &debug_info_callbacks);
+      if (!dex->DecodeDebugPositionInfo(mi->code_item_, PositionInfoCallback, &position_infos)) {
+        continue;
+      }
+
+      if (position_infos.empty()) {
+        continue;
+      }
+
+      opcodes.SetAddress(method_address);
+      if (dwarf_isa != -1) {
+        opcodes.SetISA(dwarf_isa);
       }
 
       // Get and deduplicate directory and filename.
@@ -430,20 +1046,23 @@
       opcodes.SetFile(file_index);
 
       // Generate mapping opcodes from PC to Java lines.
-      const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
-      if (file_index != 0 && !dex2line_map.empty()) {
+      if (file_index != 0) {
         bool first = true;
-        for (SrcMapElem pc2dex : mi->compiled_method_->GetSrcMappingTable()) {
+        for (SrcMapElem pc2dex : src_mapping_table) {
           uint32_t pc = pc2dex.from_;
           int dex_pc = pc2dex.to_;
-          auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
-          if (dex2line.first) {
-            int line = dex2line.second;
+          // Find mapping with address with is greater than our dex pc; then go back one step.
+          auto ub = std::upper_bound(position_infos.begin(), position_infos.end(), dex_pc,
+              [](uint32_t address, const DexFile::PositionInfo& entry) {
+                  return address < entry.address_;
+              });
+          if (ub != position_infos.begin()) {
+            int line = (--ub)->line_;
             if (first) {
               first = false;
               if (pc > 0) {
                 // Assume that any preceding code is prologue.
-                int first_line = dex2line_map.front().to_;
+                int first_line = position_infos.front().line_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
                 opcodes.AddRow(method_address, first_line);
@@ -460,9 +1079,10 @@
         // line 0 - instruction cannot be attributed to any source line.
         opcodes.AddRow(method_address, 0);
       }
+
+      opcodes.AdvancePC(text_address + mi->high_pc_);
+      opcodes.EndSequence();
     }
-    opcodes.AdvancePC(text_address + cunit_high_pc);
-    opcodes.EndSequence();
     std::vector<uint8_t> buffer;
     buffer.reserve(opcodes.data()->size() + KB);
     size_t offset = builder_->GetDebugLine()->GetSize();
@@ -473,7 +1093,8 @@
 
   void End() {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches", &debug_line_patches);
+    builder_->WritePatches(".debug_line.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_line_patches));
   }
 
  private:
@@ -483,37 +1104,29 @@
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos) {
-  struct CompilationUnit {
-    std::vector<const OatWriter::DebugInfo*> methods_;
-    size_t debug_line_offset_ = 0;
-  };
-
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    // Attribute given instruction range only to single method.
-    // Otherwise the debugger might get really confused.
-    if (!mi.deduped_) {
-      auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
-      const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
-      if (compilation_units.empty() || source_file != last_source_file) {
-        compilation_units.push_back(CompilationUnit());
-      }
-      compilation_units.back().methods_.push_back(&mi);
-      last_source_file = source_file;
+  for (const MethodDebugInfo& mi : method_infos) {
+    auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
+    const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
+    if (compilation_units.empty() || source_file != last_source_file) {
+      compilation_units.push_back(CompilationUnit());
     }
+    CompilationUnit& cu = compilation_units.back();
+    cu.methods_.push_back(&mi);
+    cu.low_pc_ = std::min(cu.low_pc_, mi.low_pc_);
+    cu.high_pc_ = std::max(cu.high_pc_, mi.high_pc_);
+    last_source_file = source_file;
   }
 
   // Write .debug_line section.
   {
     DebugLineWriter<ElfTypes> line_writer(builder);
     line_writer.Start();
-    size_t offset = 0;
     for (auto& compilation_unit : compilation_units) {
-      compilation_unit.debug_line_offset_ = offset;
-      offset += line_writer.Write(compilation_unit.methods_);
+      line_writer.WriteCompilationUnit(compilation_unit);
     }
     line_writer.End();
   }
@@ -523,27 +1136,93 @@
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
-      info_writer.Write(compilation_unit.methods_, compilation_unit.debug_line_offset_);
+      info_writer.WriteCompilationUnit(compilation_unit);
     }
     info_writer.End();
   }
 }
 
+template <typename ElfTypes>
+void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                       const ArrayRef<const MethodDebugInfo>& method_infos) {
+  bool generated_mapping_symbol = false;
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_infos.empty()) {
+    return;
+  }
+
+  // Find all addresses (low_pc) which contain deduped methods.
+  // The first instance of method is not marked deduped_, but the rest is.
+  std::unordered_set<uint32_t> deduped_addresses;
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
+    }
+  }
+
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      continue;  // Add symbol only for the first instance.
+    }
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
+      name += " [DEDUPED]";
+    }
+
+    uint32_t low_pc = info.low_pc_;
+    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+    low_pc += info.compiled_method_->CodeDelta();
+    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+
+    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
+    // instructions, so that disassembler tools can correctly disassemble.
+    // Note that even if we generate just a single mapping symbol, ARM's Streamline
+    // requires it to match function symbol.  Just address 0 does not work.
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
+      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
+                    true, 0, STB_LOCAL, STT_NOTYPE);
+        generated_mapping_symbol = true;
+      }
+    }
+  }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
+}
+
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format) {
+  if (!method_infos.empty()) {
+    // Add methods to .symtab.
+    WriteDebugSymbols(builder, method_infos);
+    // Generate CFI (stack unwinding information).
+    WriteCFISection(builder, method_infos, cfi_format);
+    // Write DWARF .debug_* sections.
+    WriteDebugSections(builder, method_infos);
+  }
+}
+
 // Explicit instantiations
-template void WriteCFISection<ElfTypes32>(
+template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteCFISection<ElfTypes64>(
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    CFIFormat cfi_format);
+template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteDebugSections<ElfTypes32>(
-    ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
-template void WriteDebugSections<ElfTypes64>(
-    ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e58fd0a..7ec0be1 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,23 +17,18 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include <vector>
-
 #include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
 
-template<typename ElfTypes>
-void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
-                     CFIFormat format);
-
-template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos);
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 5c059e1..7b1bdd7 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,18 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "utils.h"
 
 namespace art {
@@ -47,147 +46,137 @@
 // Let's use .debug_frame because it is easier to strip or compress.
 constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 
-// The ARM specification defines three special mapping symbols
-// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
-// These symbols can be used by tools, for example, to pretty
-// print instructions correctly.  Objdump will use them if they
-// exist, but it will still work well without them.
-// However, these extra symbols take space, so let's just generate
-// one symbol which marks the whole .text section as code.
-constexpr bool kGenerateSingleArmMappingSymbol = true;
-
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+class ElfWriterQuick FINAL : public ElfWriter {
+ public:
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
+
+  void Start() OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void SetBssSize(size_t bss_size) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  virtual OutputStream* GetStream() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
+
+ private:
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
+};
+
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
+  }
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
-  std::unique_ptr<BufferedOutputStream> output_stream(
-      new BufferedOutputStream(new FileOutputStream(elf_file_)));
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(
-      new ElfBuilder<ElfTypes>(isa, output_stream.get()));
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
 
-  builder->Start();
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
 
-  auto* rodata = builder->GetRoData();
-  auto* text = builder->GetText();
-  auto* bss = builder->GetBss();
-
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
   rodata->Start();
-  if (!oat_writer->WriteRodata(rodata)) {
-    return false;
-  }
-  rodata->End();
+  return rodata;
+}
 
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
   text->Start();
-  if (!oat_writer->WriteCode(text)) {
-    return false;
-  }
-  text->End();
+  return text;
+}
 
-  if (oat_writer->GetBssSize() != 0) {
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
+  auto* bss = builder_->GetBss();
+  if (bss_size != 0u) {
     bss->Start();
-    bss->SetSize(oat_writer->GetBssSize());
+    bss->SetSize(bss_size);
     bss->End();
   }
-
-  builder->WriteDynamicSection(elf_file_->GetPath());
-
-  if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-    const auto& method_infos = oat_writer->GetMethodDebugInfo();
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder.get(), method_infos);
-    }
-  }
-
-  // Add relocation section for .text.
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations());
-  }
-
-  builder->End();
-
-  return builder->Good() && output_stream->Flush();
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
-  bool generated_mapping_symbol = false;
-  auto* strtab = builder->GetStrTab();
-  auto* symtab = builder->GetSymTab();
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  builder_->WriteDynamicSection(elf_file_->GetPath());
+}
 
-  if (method_info.empty()) {
-    return;
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
   }
+}
 
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
-    }
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
+  // Add relocation section for .text.
+  if (compiler_options_->GetIncludePatchInformation()) {
+    // Note that ElfWriter::Fixup will be called regardless and therefore
+    // we need to include oat_patches for debug sections unconditionally.
+    builder_->WritePatches(".text.oat_patches", patch_locations);
   }
+}
 
-  strtab->Start();
-  strtab->Write("");  // strtab should start with empty string.
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      continue;  // Add symbol only for the first instance.
-    }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
-    }
+template <typename ElfTypes>
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
 
-    uint32_t low_pc = it->low_pc_;
-    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+  return builder_->Good();
+}
 
-    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
-    // instructions, so that disassembler tools can correctly disassemble.
-    // Note that even if we generate just a single mapping symbol, ARM's Streamline
-    // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
-      }
-    }
-  }
-  strtab->End();
-
-  // Symbols are buffered and written after names (because they are smaller).
-  // We could also do two passes in this function to avoid the buffering.
-  symtab->Start();
-  symtab->Write();
-  symtab->End();
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
+  return builder_->GetStream();
 }
 
 // Explicit instantiations
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..347d372 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,19 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index b413a9e..7cf774e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -101,7 +101,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6df1527..5afe2db 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,9 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -32,7 +34,6 @@
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -42,10 +43,17 @@
     ReserveImageSpace();
     CommonCompilerTest::SetUp();
   }
+  void TestWriteRead(ImageHeader::StorageMode storage_mode);
 };
 
-TEST_F(ImageTest, WriteRead) {
-  TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING();
+void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+  // TODO: Test does not currently work with optimizing.
+  CreateCompilerDriver(Compiler::kQuick, kRuntimeISA);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  // Enable write for dex2dex.
+  for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
+    dex_file->EnableWrite();
+  }
   // Create a generic location tmp file, to be the base of the .art and .oat temporary files.
   ScratchFile location;
   ScratchFile image_location(location, ".art");
@@ -67,17 +75,14 @@
   std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
                                                       requested_image_base,
                                                       /*compile_pic*/false,
-                                                      /*compile_app_image*/false));
+                                                      /*compile_app_image*/false,
+                                                      storage_mode));
   // TODO: compile_pic should be a test argument.
   {
     {
       jobject class_loader = nullptr;
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
-      for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
-        dex_file->EnableWrite();
-      }
       compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
@@ -92,12 +97,32 @@
                            /*compiling_boot_image*/true,
                            &timings,
                            &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      bool success = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(success);
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer.GetBssSize());
+      elf_writer->WriteDynamicSection();
+      elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+      elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
+
+      success = elf_writer->End();
+
       ASSERT_TRUE(success);
     }
   }
@@ -156,7 +181,7 @@
   java_lang_dex_file_ = nullptr;
 
   MemMap::Init();
-  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str()));
+  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileNames()[0].c_str()));
 
   RuntimeOptions options;
   std::string image("-Ximage:");
@@ -183,7 +208,13 @@
 
   gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
   ASSERT_TRUE(image_space != nullptr);
-  ASSERT_LE(image_space->Size(), image_file_size);
+  if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+    // Uncompressed, image should be smaller than file.
+    ASSERT_LE(image_space->Size(), image_file_size);
+  } else {
+    // Compressed, file should be smaller than image.
+    ASSERT_LE(image_file_size, image_space->Size());
+  }
 
   image_space->VerifyImageAllocations();
   uint8_t* image_begin = image_space->Begin();
@@ -211,6 +242,14 @@
   CHECK_EQ(0, rmdir_result);
 }
 
+TEST_F(ImageTest, WriteReadUncompressed) {
+  TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
+
+TEST_F(ImageTest, WriteReadLZ4) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
+
 TEST_F(ImageTest, ImageHeaderIsValid) {
     uint32_t image_begin = ART_BASE_ADDRESS;
     uint32_t image_size_ = 16 * KB;
@@ -231,7 +270,9 @@
                              oat_data_end,
                              oat_file_end,
                              sizeof(void*),
-                             /*compile_pic*/false);
+                             /*compile_pic*/false,
+                             ImageHeader::kDefaultStorageMode,
+                             /*data_size*/0u);
     ASSERT_TRUE(image_header.IsValid());
 
     char* magic = const_cast<char*>(image_header.GetMagic());
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3d9e7e7..9545c83 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -17,6 +17,7 @@
 #include "image_writer.h"
 
 #include <sys/stat.h>
+#include <lz4.h>
 
 #include <memory>
 #include <numeric>
@@ -225,27 +226,72 @@
     return EXIT_FAILURE;
   }
 
-  // Write out the image + fields + methods.
+  std::unique_ptr<char[]> compressed_data;
+  // Image data size excludes the bitmap and the header.
   ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  const auto write_count = image_header->GetImageSize();
-  if (!image_file->WriteFully(image_->Begin(), write_count)) {
-    PLOG(ERROR) << "Failed to write image file " << image_filename;
+  const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader);
+  char* image_data = reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader);
+  size_t data_size;
+  const char* image_data_to_write;
+
+  CHECK_EQ(image_header->storage_mode_, image_storage_mode_);
+  switch (image_storage_mode_) {
+    case ImageHeader::kStorageModeLZ4: {
+      size_t compressed_max_size = LZ4_compressBound(image_data_size);
+      compressed_data.reset(new char[compressed_max_size]);
+      data_size = LZ4_compress(
+          reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader),
+          &compressed_data[0],
+          image_data_size);
+      image_data_to_write = &compressed_data[0];
+      VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size;
+      break;
+    }
+    case ImageHeader::kStorageModeUncompressed: {
+      data_size = image_data_size;
+      image_data_to_write = image_data;
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
+    }
+  }
+
+  // Write header first, as uncompressed.
+  image_header->data_size_ = data_size;
+  if (!image_file->WriteFully(image_->Begin(), sizeof(ImageHeader))) {
+    PLOG(ERROR) << "Failed to write image file header " << image_filename;
     image_file->Erase();
     return false;
   }
 
-  // Write out the image bitmap at the page aligned start of the image end.
+  // Write out the image + fields + methods.
+  const bool is_compressed = compressed_data != nullptr;
+  if (!image_file->WriteFully(image_data_to_write, data_size)) {
+    PLOG(ERROR) << "Failed to write image file data " << image_filename;
+    image_file->Erase();
+    return false;
+  }
+
+  // Write out the image bitmap at the page aligned start of the image end, also uncompressed for
+  // convenience.
   const ImageSection& bitmap_section = image_header->GetImageSection(
       ImageHeader::kSectionImageBitmap);
-  CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+  // Align up since data size may be unaligned if the image is compressed.
+  size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
+  if (!is_compressed) {
+    CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
+  }
   if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
-                         bitmap_section.Size(), bitmap_section.Offset())) {
+                         bitmap_section.Size(),
+                         bitmap_position_in_file)) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
     image_file->Erase();
     return false;
   }
-
-  CHECK_EQ(bitmap_section.End(), static_cast<size_t>(image_file->GetLength()));
+  CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
+           static_cast<size_t>(image_file->GetLength()));
   if (image_file->FlushCloseOrErase() != 0) {
     PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
     return false;
@@ -330,10 +376,20 @@
 }
 
 void ImageWriter::PrepareDexCacheArraySlots() {
+  // Prepare dex cache array starts based on the ordering specified in the CompilerDriver.
+  uint32_t size = 0u;
+  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
+    dex_cache_array_starts_.Put(dex_file, size);
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
+    size += layout.Size();
+  }
+  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
+  // when AssignImageBinSlot() assigns their indexes out or order.
+  bin_slot_sizes_[kBinDexCacheArray] = size;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *class_linker->DexLock());
-  uint32_t size = 0u;
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
     mirror::DexCache* dex_cache =
         down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
@@ -341,22 +397,18 @@
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
-    dex_cache_array_starts_.Put(dex_file, size);
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
+    uint32_t start = dex_cache_array_starts_.Get(dex_file);
     DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset());
     DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset());
     DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset());
     DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset());
-    size += layout.Size();
+    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset());
   }
-  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
-  // when AssignImageBinSlot() assigns their indexes out or order.
-  bin_slot_sizes_[kBinDexCacheArray] = size;
 }
 
 void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
@@ -534,7 +586,10 @@
 }
 
 bool ImageWriter::AllocMemory() {
-  const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
+  const size_t length = RoundUp(image_objects_offset_begin_ +
+                                    GetBinSizeSum() +
+                                    intern_table_bytes_ +
+                                    class_table_bytes_,
                                 kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image",
@@ -586,6 +641,17 @@
 }
 
 bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  bool early_exit = false;
+  std::unordered_set<mirror::Class*> visited;
+  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+}
+
+bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+    mirror::Class* klass,
+    bool* early_exit,
+    std::unordered_set<mirror::Class*>* visited) {
+  DCHECK(early_exit != nullptr);
+  DCHECK(visited != nullptr);
   if (klass == nullptr) {
     return false;
   }
@@ -594,14 +660,22 @@
     // Already computed, return the found value.
     return found->second;
   }
-  // Place holder value to prevent infinite recursion.
-  prune_class_memo_.emplace(klass, false);
+  // Circular dependencies, return false but do not store the result in the memoization table.
+  if (visited->find(klass) != visited->end()) {
+    *early_exit = true;
+    return false;
+  }
+  visited->emplace(klass);
   bool result = IsBootClassLoaderNonImageClass(klass);
+  bool my_early_exit = false;  // Only for ourselves, ignore caller.
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
     for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-      result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i));
+      result = result || ContainsBootClassLoaderNonImageClassInternal(
+          if_table->GetInterface(i),
+          &my_early_exit,
+          visited);
     }
   }
   // Check static fields and their classes.
@@ -615,16 +689,38 @@
       mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
       if (ref != nullptr) {
         if (ref->IsClass()) {
-          result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass());
+          result = result ||
+                   ContainsBootClassLoaderNonImageClassInternal(
+                       ref->AsClass(),
+                       &my_early_exit,
+                       visited);
         }
-        result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass());
+        result = result ||
+                 ContainsBootClassLoaderNonImageClassInternal(
+                     ref->GetClass(),
+                     &my_early_exit,
+                     visited);
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
                                   sizeof(mirror::HeapReference<mirror::Object>));
     }
   }
-  result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass());
-  prune_class_memo_[klass] = result;
+  result = result ||
+           ContainsBootClassLoaderNonImageClassInternal(
+               klass->GetSuperClass(),
+               &my_early_exit,
+               visited);
+  // Erase the element we stored earlier since we are exiting the function.
+  auto it = visited->find(klass);
+  DCHECK(it != visited->end());
+  visited->erase(it);
+  // Only store result if it is true or none of the calls early exited due to circular
+  // dependencies. If visited is empty then we are the root caller, in this case the cycle was in
+  // a child call and we can remember the result.
+  if (result == true || !my_early_exit || visited->empty()) {
+    prune_class_memo_[klass] = result;
+  }
+  *early_exit |= my_early_exit;
   return result;
 }
 
@@ -934,44 +1030,42 @@
         }
       }
       // Visit and assign offsets for methods.
-      LengthPrefixedArray<ArtMethod>* method_arrays[] = {
-          as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(),
-      };
-      for (LengthPrefixedArray<ArtMethod>* array : method_arrays) {
-        if (array == nullptr) {
-          continue;
-        }
+      size_t num_methods = as_klass->NumMethods();
+      if (num_methods != 0) {
         bool any_dirty = false;
-        size_t count = 0;
-        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
-        const size_t method_size = ArtMethod::Size(target_ptr_size_);
-        auto iteration_range =
-            MakeIterationRangeFromLengthPrefixedArray(array, method_size, method_alignment);
-        for (auto& m : iteration_range) {
-          any_dirty = any_dirty || WillMethodBeDirty(&m);
-          ++count;
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
+          if (WillMethodBeDirty(&m)) {
+            any_dirty = true;
+            break;
+          }
         }
         NativeObjectRelocationType type = any_dirty
             ? kNativeObjectRelocationTypeArtMethodDirty
             : kNativeObjectRelocationTypeArtMethodClean;
         Bin bin_type = BinTypeForNativeRelocationType(type);
         // Forward the entire array at once, but header first.
+        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
+        const size_t method_size = ArtMethod::Size(target_ptr_size_);
         const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0,
                                                                                method_size,
                                                                                method_alignment);
+        LengthPrefixedArray<ArtMethod>* array = as_klass->GetMethodsPtr();
         auto it = native_object_relocations_.find(array);
-        CHECK(it == native_object_relocations_.end()) << "Method array " << array
-            << " already forwarded";
+        CHECK(it == native_object_relocations_.end())
+            << "Method array " << array << " already forwarded";
         size_t& offset = bin_slot_sizes_[bin_type];
         DCHECK(!IsInBootImage(array));
-        native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
-            any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
-                kNativeObjectRelocationTypeArtMethodArrayClean });
+        native_object_relocations_.emplace(
+            array, NativeObjectRelocation {
+              offset,
+              any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
+                        : kNativeObjectRelocationTypeArtMethodArrayClean
+            });
         offset += header_size;
-        for (auto& m : iteration_range) {
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
           AssignMethodOffset(&m, type);
         }
-        (any_dirty ? dirty_methods_ : clean_methods_) += count;
+        (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
       }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
@@ -983,6 +1077,14 @@
           WalkFieldsInOrder(value);
         }
       }
+    } else if (h_obj->IsClassLoader()) {
+      // Register the class loader if it has a class table.
+      // The fake boot class loader should not get registered and we should end up with only one
+      // class loader.
+      mirror::ClassLoader* class_loader = h_obj->AsClassLoader();
+      if (class_loader->GetClassTable() != nullptr) {
+        class_loaders_.insert(class_loader);
+      }
     }
   }
 }
@@ -1107,10 +1209,29 @@
   }
 
   // Calculate how big the intern table will be after being serialized.
-  auto* const intern_table = Runtime::Current()->GetInternTable();
+  InternTable* const intern_table = runtime->GetInternTable();
   CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
   intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
 
+  // Write out the class table.
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  if (boot_image_space_ == nullptr) {
+    // Compiling the boot image, add null class loader.
+    class_loaders_.insert(nullptr);
+  }
+  // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
+  // with no classes.
+  if (class_loaders_.size() == 1u) {
+    // Only write the class table if we have exactly one class loader. There may be cases where
+    // there are multiple class loaders if a class path is passed to dex2oat.
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      class_table_bytes_ += table->WriteToMemory(nullptr);
+    }
+  }
+
   // Note that image_end_ is left at end of used mirror object section.
 }
 
@@ -1152,6 +1273,14 @@
   auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
+  // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
+  cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
+  // Calculate the size of the class table section.
+  auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+  *class_table_section = ImageSection(cur_pos, class_table_bytes_);
+  cur_pos = class_table_section->End();
+  // Image end goes right before the start of the image bitmap.
+  const size_t image_end = static_cast<uint32_t>(cur_pos);
   // Finally bitmap section.
   const size_t bitmap_bytes = image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
@@ -1165,10 +1294,10 @@
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
   }
-  const size_t image_end = static_cast<uint32_t>(interned_strings_section->End());
   CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
       "Oat file should be right after the image.";
-  // Create the header.
+  // Create the header, leave 0 for data size since we will fill this in as we are writing the
+  // image.
   new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
                                                           image_end,
                                                           sections,
@@ -1179,7 +1308,9 @@
                                                           PointerToLowMemUInt32(oat_data_end),
                                                           PointerToLowMemUInt32(oat_file_end),
                                                           target_ptr_size_,
-                                                          compile_pic_);
+                                                          compile_pic_,
+                                                          image_storage_mode_,
+                                                          /*data_size*/0u);
 }
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
@@ -1276,23 +1407,52 @@
     }
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
+  FixupRootVisitor root_visitor(this);
+
   // Write the intern table into the image.
   const ImageSection& intern_table_section = image_header->GetImageSection(
       ImageHeader::kSectionInternedStrings);
-  InternTable* const intern_table = Runtime::Current()->GetInternTable();
-  uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr);
+  Runtime* const runtime = Runtime::Current();
+  InternTable* const intern_table = runtime->GetInternTable();
+  uint8_t* const intern_table_memory_ptr = image_->Begin() + intern_table_section.Offset();
+  const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
   // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_table;
+  InternTable temp_intern_table;
   // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
   // the VisitRoots() will update the memory directly rather than the copies.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
-  temp_table.ReadFromMemory(memory_ptr);
-  CHECK_EQ(temp_table.Size(), intern_table->Size());
-  FixupRootVisitor visitor(this);
-  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
+  temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
+  CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+  temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+
+  // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
+  // class loaders. Writing multiple class tables into the image is currently unsupported.
+  if (class_table_bytes_ > 0u) {
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    const ImageSection& class_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionClassTable);
+    uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    size_t class_table_bytes = 0;
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
+      class_table_bytes += table->WriteToMemory(memory_ptr);
+      // Fixup the pointers in the newly written class table to contain image addresses. See
+      // above comment for intern tables.
+      ClassTable temp_class_table;
+      temp_class_table.ReadFromMemory(memory_ptr);
+      CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+               table->NumZygoteClasses());
+      BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                      RootInfo(kRootUnknown));
+      temp_class_table.VisitRoots(buffered_visitor);
+    }
+    CHECK_EQ(class_table_bytes, class_table_bytes_);
+  }
 }
 
 void ImageWriter::CopyAndFixupObjects() {
@@ -1506,8 +1666,7 @@
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
         FixupDexCache(down_cast<mirror::DexCache*>(orig), down_cast<mirror::DexCache*>(copy));
-      } else if (klass->IsSubClass(down_cast<mirror::Class*>(
-          class_linker->GetClassRoot(ClassLinker::kJavaLangClassLoader)))) {
+      } else if (klass->IsClassLoaderClass()) {
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
         down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
@@ -1793,7 +1952,8 @@
                                 bin_slot_sizes_[kBinArtMethodDirty] +
                                 bin_slot_sizes_[kBinArtMethodClean] +
                                 bin_slot_sizes_[kBinDexCacheArray] +
-                                intern_table_bytes_;
+                                intern_table_bytes_ +
+                                class_table_bytes_;
   return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 22cb91a..f1b2965 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -27,10 +27,11 @@
 #include <ostream>
 
 #include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
 #include "gc/space/space.h"
-#include "length_prefixed_array.h"
+#include "image.h"
 #include "lock_word.h"
 #include "mem_map.h"
 #include "oat_file.h"
@@ -54,7 +55,8 @@
   ImageWriter(const CompilerDriver& compiler_driver,
               uintptr_t image_begin,
               bool compile_pic,
-              bool compile_app_image)
+              bool compile_app_image,
+              ImageHeader::StorageMode image_storage_mode)
       : compiler_driver_(compiler_driver),
         image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
         image_end_(0),
@@ -72,7 +74,9 @@
         intern_table_bytes_(0u),
         image_method_array_(ImageHeader::kImageMethodsCount),
         dirty_methods_(0u),
-        clean_methods_(0u) {
+        clean_methods_(0u),
+        class_table_bytes_(0u),
+        image_storage_mode_(image_storage_mode) {
     CHECK_NE(image_begin, 0U);
     std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
     std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
@@ -343,6 +347,12 @@
   bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // early_exit is true if we had a cyclic dependency anywhere down the chain.
+  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
+                                                    bool* early_exit,
+                                                    std::unordered_set<mirror::Class*>* visited)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
   uintptr_t NativeOffsetInImage(void* obj);
@@ -447,6 +457,17 @@
   // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
   std::unordered_map<mirror::Class*, bool> prune_class_memo_;
 
+  // Class loaders with a class table to write out. There should only be one class loader because
+  // dex2oat loads the dex files to be compiled into a single class loader. For the boot image,
+  // null is a valid entry.
+  std::unordered_set<mirror::ClassLoader*> class_loaders_;
+
+  // Number of image class table bytes.
+  size_t class_table_bytes_;
+
+  // Which mode the image is stored as, see image.h
+  const ImageHeader::StorageMode image_storage_mode_;
+
   friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 2125c9a..d001495 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -170,18 +170,6 @@
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
-  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -190,13 +178,13 @@
   }
 
   // Do the compilation.
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f3bda2f..8d60be2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -219,7 +219,9 @@
   // calling through stub will link with &Java_MyClassNatives_bar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false,
+                                    nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -233,7 +235,9 @@
   // calling through stub will link with &Java_MyClassNatives_sbar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false,
+                                    nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 13754fd..73b0fac 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,9 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 #include "compiled_method.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 57018af..3d4c218 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,10 +20,10 @@
 #include "art_method.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
-#include "utils/arm64/assembler_arm64.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
+#include "utils/arm64/assembler_arm64.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc
similarity index 76%
rename from compiler/buffered_output_stream.cc
rename to compiler/linker/buffered_output_stream.cc
index 3ca518b..4c66c76 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/linker/buffered_output_stream.cc
@@ -20,18 +20,24 @@
 
 namespace art {
 
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
   }
@@ -42,6 +48,10 @@
 }
 
 bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
   bool success = true;
   if (used_ > 0) {
     success = out_->WriteFully(&buffer_[0], used_);
@@ -51,7 +61,7 @@
 }
 
 off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
+  if (!FlushBuffer()) {
     return -1;
   }
   return out_->Seek(offset, whence);
diff --git a/compiler/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h
similarity index 65%
rename from compiler/buffered_output_stream.h
rename to compiler/linker/buffered_output_stream.h
index b447f41..a2eefbb 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/linker/buffered_output_stream.h
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-#define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+
+#include <memory>
 
 #include "output_stream.h"
 
@@ -25,26 +27,23 @@
 
 class BufferedOutputStream FINAL : public OutputStream {
  public:
-  explicit BufferedOutputStream(OutputStream* out);
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
 
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
+  ~BufferedOutputStream() OVERRIDE;
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
 
-  bool Flush();
+  bool Flush() OVERRIDE;
 
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  OutputStream* const out_;
+  bool FlushBuffer();
 
+  std::unique_ptr<OutputStream> const out_;
   uint8_t buffer_[kBufferSize];
-
   size_t used_;
 
   DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
@@ -52,4 +51,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
new file mode 100644
index 0000000..99410e4
--- /dev/null
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+// OutputStream wrapper that delays reporting an error until Flush().
+class ErrorDelayingOutputStream FINAL : public OutputStream {
+ public:
+  explicit ErrorDelayingOutputStream(OutputStream* output)
+      : OutputStream(output->GetLocation()),
+        output_(output),
+        output_good_(true),
+        output_offset_(0) { }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to " << GetLocation() << " at offset " << output_offset_;
+        output_good_ = false;
+      }
+    }
+    output_offset_ += byte_count;
+    return true;
+  }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == static_cast<off_t>(-1)) {
+        PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
+  }
+
+  // Flush the output and return whether all operations have succeeded.
+  // Do nothing if we already have a pending error.
+  bool Flush() OVERRIDE {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
+  // Check (without flushing) whether all operations have succeeded so far.
+  bool Good() const {
+    return output_good_;
+  }
+
+ private:
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
diff --git a/compiler/file_output_stream.cc b/compiler/linker/file_output_stream.cc
similarity index 94%
rename from compiler/file_output_stream.cc
rename to compiler/linker/file_output_stream.cc
index 3ee16f5..bbfbdfd 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/linker/file_output_stream.cc
@@ -33,4 +33,8 @@
   return lseek(file_->Fd(), offset, static_cast<int>(whence));
 }
 
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
 }  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/linker/file_output_stream.h
similarity index 72%
rename from compiler/file_output_stream.h
rename to compiler/linker/file_output_stream.h
index 9dfbd7f..f2d8453 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/linker/file_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_FILE_OUTPUT_STREAM_H_
-#define ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -27,11 +27,13 @@
  public:
   explicit FileOutputStream(File* file);
 
-  virtual ~FileOutputStream() {}
+  ~FileOutputStream() OVERRIDE {}
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   File* const file_;
@@ -41,4 +43,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream.cc b/compiler/linker/output_stream.cc
similarity index 100%
rename from compiler/output_stream.cc
rename to compiler/linker/output_stream.cc
diff --git a/compiler/output_stream.h b/compiler/linker/output_stream.h
similarity index 76%
rename from compiler/output_stream.h
rename to compiler/linker/output_stream.h
index 4d30b83..96a5f48 100644
--- a/compiler/output_stream.h
+++ b/compiler/linker/output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OUTPUT_STREAM_H_
-#define ART_COMPILER_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_
 
 #include <ostream>
 #include <string>
@@ -45,6 +45,14 @@
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
  private:
   const std::string location_;
 
@@ -53,4 +61,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream_test.cc b/compiler/linker/output_stream_test.cc
similarity index 71%
rename from compiler/output_stream_test.cc
rename to compiler/linker/output_stream_test.cc
index 6104ccd..84c76f2 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -19,6 +19,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -48,6 +49,7 @@
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
@@ -77,9 +79,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -99,4 +99,39 @@
   CheckTestOutput(output);
 }
 
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
 }  // namespace art
diff --git a/compiler/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
similarity index 94%
rename from compiler/vector_output_stream.cc
rename to compiler/linker/vector_output_stream.cc
index 3d33673..f758005 100644
--- a/compiler/vector_output_stream.cc
+++ b/compiler/linker/vector_output_stream.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
-  : OutputStream(location), offset_(vector->size()), vector_(vector) {}
+    : OutputStream(location), offset_(vector->size()), vector_(vector) {}
 
 off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
   CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
diff --git a/compiler/vector_output_stream.h b/compiler/linker/vector_output_stream.h
similarity index 81%
rename from compiler/vector_output_stream.h
rename to compiler/linker/vector_output_stream.h
index 3c5877c..3210143 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/linker/vector_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-#define ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -29,9 +29,9 @@
  public:
   VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
 
-  virtual ~VectorOutputStream() {}
+  ~VectorOutputStream() OVERRIDE {}
 
-  bool WriteFully(const void* buffer, size_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
     if (static_cast<size_t>(offset_) == vector_->size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_->insert(vector_->end(), &start[0], &start[byte_count]);
@@ -45,7 +45,11 @@
     return true;
   }
 
-  off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
 
  private:
   void EnsureCapacity(off_t new_offset) {
@@ -62,4 +66,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 030451c..cd0f0d2 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -27,14 +27,17 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -134,11 +137,31 @@
                          /*compiling_boot_image*/false,
                          &timings,
                          &key_value_store);
-    return compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                      !kIsTargetBuild,
-                                      dex_files,
-                                      &oat_writer,
-                                      file);
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+
+    elf_writer->Start();
+
+    OutputStream* rodata = elf_writer->StartRoData();
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    elf_writer->SetBssSize(oat_writer.GetBssSize());
+    elf_writer->WriteDynamicSection();
+    elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
+    elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
+
+    return elf_writer->End();
   }
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
@@ -176,7 +199,7 @@
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
-  ASSERT_EQ(1U, oat_header.GetDexFileCount());  // core
+  ASSERT_EQ(class_linker->GetBootClassPath().size(), oat_header.GetDexFileCount());  // core
   ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum());
   ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin());
   ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey)));
@@ -201,8 +224,9 @@
     }
 
     const char* descriptor = dex_file.GetClassDescriptor(class_def);
-    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
-                                                   NullHandle<mirror::ClassLoader>());
+    mirror::Class* klass = class_linker->FindClass(soa.Self(),
+                                                   descriptor,
+                                                   ScopedNullHandle<mirror::ClassLoader>());
 
     const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i);
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor;
@@ -215,12 +239,12 @@
       ++method_index;
     }
     size_t visited_virtuals = 0;
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
-      if (!m.IsMiranda()) {
-        CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
-        ++method_index;
-        ++visited_virtuals;
-      }
+    // TODO We should also check copied methods in this test.
+    for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
+      EXPECT_FALSE(m.IsMiranda());
+      CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
+      ++method_index;
+      ++visited_virtuals;
     }
     EXPECT_EQ(visited_virtuals, num_virtual_methods);
   }
@@ -232,7 +256,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(114 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -241,14 +265,9 @@
     std::unique_ptr<const InstructionSetFeatures> insn_features(
         InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
     ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-    std::vector<const DexFile*> dex_files;
-    uint32_t image_file_location_oat_checksum = 0;
-    uint32_t image_file_location_oat_begin = 0;
     std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
                                                             insn_features.get(),
-                                                            &dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_begin,
+                                                            0u,
                                                             nullptr));
     ASSERT_NE(oat_header.get(), nullptr);
     ASSERT_TRUE(oat_header->IsValid());
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 40a3f14..53ac77b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,10 +31,12 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "linker/output_stream.h"
 #include "linker/relative_patcher.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -42,7 +44,6 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "type_lookup_table.h"
@@ -51,6 +52,97 @@
 
 namespace art {
 
+class OatWriter::OatClass {
+ public:
+  OatClass(size_t offset,
+           const dchecked_vector<CompiledMethod*>& compiled_methods,
+           uint32_t num_non_null_compiled_methods,
+           mirror::Class::Status status);
+  OatClass(OatClass&& src) = default;
+  size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
+  size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+
+  CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
+    return compiled_methods_[class_def_method_index];
+  }
+
+  // Offset of start of OatClass from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // CompiledMethods for each class_def_method_index, or null if no method is available.
+  dchecked_vector<CompiledMethod*> compiled_methods_;
+
+  // Offset from OatClass::offset_ to the OatMethodOffsets for the
+  // class_def_method_index. If 0, it means the corresponding
+  // CompiledMethod entry in OatClass::compiled_methods_ should be
+  // null and that the OatClass::type_ should be kOatClassBitmap.
+  dchecked_vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
+
+  // Data to write.
+
+  static_assert(mirror::Class::Status::kStatusMax < (1 << 16), "class status won't fit in 16bits");
+  int16_t status_;
+
+  static_assert(OatClassType::kOatClassMax < (1 << 16), "oat_class type won't fit in 16bits");
+  uint16_t type_;
+
+  uint32_t method_bitmap_size_;
+
+  // bit vector indexed by ClassDef method index. When
+  // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
+  // method has an OatMethodOffsets in methods_offsets_, otherwise
+  // the entry was ommited to save space. If OatClassType::type_ is
+  // not is kOatClassBitmap, the bitmap will be null.
+  std::unique_ptr<BitVector> method_bitmap_;
+
+  // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
+  // present in the OatClass. Note that some may be missing if
+  // OatClass::compiled_methods_ contains null values (and
+  // oat_method_offsets_offsets_from_oat_class_ should contain 0
+  // values in this case).
+  dchecked_vector<OatMethodOffsets> method_offsets_;
+  dchecked_vector<OatQuickMethodHeader> method_headers_;
+
+ private:
+  size_t GetMethodOffsetsRawSize() const {
+    return method_offsets_.size() * sizeof(method_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatClass);
+};
+
+class OatWriter::OatDexFile {
+ public:
+  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(OatDexFile&& src) = default;
+
+  size_t SizeOf() const;
+  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+
+  // Offset of start of OatDexFile from beginning of OatHeader. It is
+  // used to validate file position when writing.
+  size_t offset_;
+
+  // Data to write.
+  uint32_t dex_file_location_size_;
+  const uint8_t* dex_file_location_data_;
+  uint32_t dex_file_location_checksum_;
+  uint32_t dex_file_offset_;
+  uint32_t lookup_table_offset_;
+  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+  dchecked_vector<uint32_t> class_offsets_;
+
+ private:
+  size_t GetClassOffsetsRawSize() const {
+    return class_offsets_.size() * sizeof(class_offsets_[0]);
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(OatDexFile);
+};
+
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -105,14 +197,14 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
-    size_oat_dex_file_methods_offsets_(0),
+    size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_dex_file_class_offsets_(0),
+    size_oat_lookup_table_alignment_(0),
+    size_oat_lookup_table_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
-    size_oat_lookup_table_alignment_(0),
-    size_oat_lookup_table_offset_(0),
-    size_oat_lookup_table_(0),
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
   if (compiling_boot_image) {
@@ -179,9 +271,6 @@
 }
 
 OatWriter::~OatWriter() {
-  delete oat_header_;
-  STLDeleteElements(&oat_dex_files_);
-  STLDeleteElements(&oat_classes_);
 }
 
 struct OatWriter::GcMapDataAccess {
@@ -325,6 +414,11 @@
     : DexMethodVisitor(writer, offset),
       compiled_methods_(),
       num_non_null_compiled_methods_(0u) {
+    size_t num_classes = 0u;
+    for (const OatDexFile& oat_dex_file : writer_->oat_dex_files_) {
+      num_classes += oat_dex_file.class_offsets_.size();
+    }
+    writer_->oat_classes_.reserve(num_classes);
     compiled_methods_.reserve(256u);
   }
 
@@ -363,16 +457,16 @@
       status = mirror::Class::kStatusNotReady;
     }
 
-    OatClass* oat_class = new OatClass(offset_, compiled_methods_,
-                                       num_non_null_compiled_methods_, status);
-    writer_->oat_classes_.push_back(oat_class);
-    oat_class->UpdateChecksum(writer_->oat_header_);
-    offset_ += oat_class->SizeOf();
+    writer_->oat_classes_.emplace_back(offset_,
+                                       compiled_methods_,
+                                       num_non_null_compiled_methods_,
+                                       status);
+    offset_ += writer_->oat_classes_.back().SizeOf();
     return DexMethodVisitor::EndClass();
   }
 
  private:
-  std::vector<CompiledMethod*> compiled_methods_;
+  dchecked_vector<CompiledMethod*> compiled_methods_;
   size_t num_non_null_compiled_methods_;
 };
 
@@ -395,7 +489,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
@@ -485,7 +579,7 @@
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
+        writer_->method_info_.push_back(dwarf::MethodDebugInfo {
             dex_file_,
             class_def_index_,
             it.GetMemberIndex(),
@@ -582,7 +676,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {
@@ -599,7 +693,6 @@
           DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
           dedupe_map_.PutBefore(lb, map.data(), offset_);
           offset_ += map_size;
-          writer_->oat_header_->UpdateChecksum(&map[0], map_size);
         }
       }
       ++method_offsets_index_;
@@ -623,7 +716,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     OatMethodOffsets offsets(0u);
@@ -640,8 +733,12 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
         Thread::Current(), *dex_file_)));
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
+    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+        *dex_file_,
+        it.GetMemberIndex(),
+        dex_cache,
+        ScopedNullHandle<mirror::ClassLoader>(),
+        nullptr,
         invoke_type);
     if (method == nullptr) {
       LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
@@ -710,7 +807,7 @@
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     // No thread suspension since dex_cache_ that may get invalidated if that occurs.
@@ -747,8 +844,7 @@
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
-        writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
-        if (!out->WriteFully(&method_header, sizeof(method_header))) {
+        if (!writer_->WriteData(out, &method_header, sizeof(method_header))) {
           ReportWriteFailure("method header", it);
           return false;
         }
@@ -785,8 +881,7 @@
           }
         }
 
-        writer_->oat_header_->UpdateChecksum(quick_code.data(), code_size);
-        if (!out->WriteFully(quick_code.data(), code_size)) {
+        if (!writer_->WriteData(out, quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -940,7 +1035,7 @@
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
-    OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
     if (compiled_method != nullptr) {  // ie. not an abstract method
@@ -958,7 +1053,7 @@
           << map_size << " " << map_offset << " " << offset_ << " "
           << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
       if (map_size != 0u && map_offset == offset_) {
-        if (UNLIKELY(!out->WriteFully(&map[0], map_size))) {
+        if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
           ReportWriteFailure(it);
           return false;
         }
@@ -1023,12 +1118,12 @@
 }
 
 size_t OatWriter::InitOatHeader() {
-  oat_header_ = OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                  compiler_driver_->GetInstructionSetFeatures(),
-                                  dex_files_,
-                                  image_file_location_oat_checksum_,
-                                  image_file_location_oat_begin_,
-                                  key_value_store_);
+  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
+                                      compiler_driver_->GetInstructionSetFeatures(),
+                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
+                                      key_value_store_));
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
 
   return oat_header_->GetHeaderSize();
 }
@@ -1038,9 +1133,8 @@
   for (size_t i = 0; i != dex_files_->size(); ++i) {
     const DexFile* dex_file = (*dex_files_)[i];
     CHECK(dex_file != nullptr);
-    OatDexFile* oat_dex_file = new OatDexFile(offset, *dex_file);
-    oat_dex_files_.push_back(oat_dex_file);
-    offset += oat_dex_file->SizeOf();
+    oat_dex_files_.emplace_back(offset, *dex_file);
+    offset += oat_dex_files_.back().SizeOf();
   }
   return offset;
 }
@@ -1054,12 +1148,12 @@
     size_dex_file_alignment_ += offset - original_offset;
 
     // set offset in OatDexFile to DexFile
-    oat_dex_files_[i]->dex_file_offset_ = offset;
+    oat_dex_files_[i].dex_file_offset_ = offset;
 
     const DexFile* dex_file = (*dex_files_)[i];
 
     // Initialize type lookup table
-    oat_dex_files_[i]->lookup_table_ = dex_file->GetTypeLookupTable();
+    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
 
     offset += dex_file->GetHeader().file_size_;
   }
@@ -1067,14 +1161,14 @@
 }
 
 size_t OatWriter::InitLookupTables(size_t offset) {
-  for (OatDexFile* oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file->lookup_table_ != nullptr) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.lookup_table_ != nullptr) {
       uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file->lookup_table_offset_ = aligned_offset;
+      oat_dex_file.lookup_table_offset_ = aligned_offset;
       size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file->lookup_table_->RawDataLength();
+      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
     } else {
-      oat_dex_file->lookup_table_offset_ = 0;
+      oat_dex_file.lookup_table_offset_ = 0;
     }
   }
   return offset;
@@ -1089,13 +1183,12 @@
 
   // Update oat_dex_files_.
   auto oat_class_it = oat_classes_.begin();
-  for (OatDexFile* oat_dex_file : oat_dex_files_) {
-    for (uint32_t& method_offset : oat_dex_file->methods_offsets_) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    for (uint32_t& class_offset : oat_dex_file.class_offsets_) {
       DCHECK(oat_class_it != oat_classes_.end());
-      method_offset = (*oat_class_it)->offset_;
+      class_offset = oat_class_it->offset_;
       ++oat_class_it;
     }
-    oat_dex_file->UpdateChecksum(oat_header_);
   }
   CHECK(oat_class_it == oat_classes_.end());
 
@@ -1179,17 +1272,14 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
-  if (raw_file_offset == (off_t) -1) {
-    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+  if (!GetOatDataOffset(out)) {
     return false;
   }
-  const size_t file_offset = static_cast<size_t>(raw_file_offset);
-  oat_data_offset_ = file_offset;
+  const size_t file_offset = oat_data_offset_;
 
   // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == (off_t) -1) {
+  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
     return false;
   }
@@ -1202,7 +1292,7 @@
   }
 
   off_t tables_end_offset = out->Seek(0, kSeekCurrent);
-  if (tables_end_offset == (off_t) -1) {
+  if (tables_end_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
@@ -1247,7 +1337,7 @@
   }
 
   const off_t oat_end_file_offset = out->Seek(0, kSeekCurrent);
-  if (oat_end_file_offset == (off_t) -1) {
+  if (oat_end_file_offset == static_cast<off_t>(-1)) {
     LOG(ERROR) << "Failed to get oat end file offset in " << out->GetLocation();
     return false;
   }
@@ -1283,14 +1373,14 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
-    DO_STAT(size_oat_dex_file_methods_offsets_);
+    DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_);
+    DO_STAT(size_oat_lookup_table_alignment_);
+    DO_STAT(size_oat_lookup_table_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
     DO_STAT(size_oat_class_method_offsets_);
-    DO_STAT(size_oat_lookup_table_alignment_);
-    DO_STAT(size_oat_lookup_table_offset_);
-    DO_STAT(size_oat_lookup_table_);
     #undef DO_STAT
 
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; \
@@ -1301,17 +1391,20 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
+  // Finalize the header checksum.
+  oat_header_->UpdateChecksumWithHeaderData();
+
   // Write the header now that the checksum is final.
-  if (out->Seek(file_offset, kSeekSet) == (off_t) -1) {
+  if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
-  if (!out->WriteFully(oat_header_, header_size)) {
+  if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == (off_t) -1) {
+  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
     return false;
   }
@@ -1322,13 +1415,13 @@
 
 bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
   for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i]->Write(this, out, file_offset)) {
+    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
       PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
       return false;
     }
   }
   for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i]->dex_file_offset_;
+    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
     off_t actual_offset = out->Seek(expected_offset, kSeekSet);
     if (static_cast<uint32_t>(actual_offset) != expected_offset) {
       const DexFile* dex_file = (*dex_files_)[i];
@@ -1348,7 +1441,7 @@
     return false;
   }
   for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i]->Write(this, out, file_offset)) {
+    if (!oat_classes_[i].Write(this, out, file_offset)) {
       PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
       return false;
     }
@@ -1358,8 +1451,8 @@
 
 bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
   for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i]->lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i]->lookup_table_;
+    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
+    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
     DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
     if (lookup_table_offset == 0) {
       continue;
@@ -1373,7 +1466,7 @@
       return false;
     }
     if (table != nullptr) {
-      if (!out->WriteFully(table->RawData(), table->RawDataLength())) {
+      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
         const DexFile* dex_file = (*dex_files_)[i];
         PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
                     << " to " << out->GetLocation();
@@ -1422,7 +1515,7 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!out->WriteFully(&(*field)[0], field->size())) { \
+        if (!WriteData(out, field->data(), field->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
@@ -1464,6 +1557,17 @@
   return relative_offset;
 }
 
+bool OatWriter::GetOatDataOffset(OutputStream* out) {
+  // Get the elf file offset of the oat file.
+  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
+  if (raw_file_offset == static_cast<off_t>(-1)) {
+    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+    return false;
+  }
+  oat_data_offset_ = static_cast<size_t>(raw_file_offset);
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1476,6 +1580,11 @@
   return true;
 }
 
+bool OatWriter::WriteData(OutputStream* out, const void* data, size_t size) {
+  oat_header_->UpdateChecksum(data, size);
+  return out->WriteFully(data, size);
+}
+
 std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) {
   auto it = map.find(ref);
   if (it == map.end()) {
@@ -1493,7 +1602,7 @@
   dex_file_location_checksum_ = dex_file.GetLocationChecksum();
   dex_file_offset_ = 0;
   lookup_table_offset_ = 0;
-  methods_offsets_.resize(dex_file.NumClassDefs());
+  class_offsets_.resize(dex_file.NumClassDefs());
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1502,63 +1611,50 @@
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
           + sizeof(lookup_table_offset_)
-          + (sizeof(methods_offsets_[0]) * methods_offsets_.size());
-}
-
-void OatWriter::OatDexFile::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&dex_file_location_size_, sizeof(dex_file_location_size_));
-  oat_header->UpdateChecksum(dex_file_location_data_, dex_file_location_size_);
-  oat_header->UpdateChecksum(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_));
-  oat_header->UpdateChecksum(&dex_file_offset_, sizeof(dex_file_offset_));
-  oat_header->UpdateChecksum(&lookup_table_offset_, sizeof(lookup_table_offset_));
-  if (lookup_table_ != nullptr) {
-    oat_header->UpdateChecksum(lookup_table_->RawData(), lookup_table_->RawDataLength());
-  }
-  oat_header->UpdateChecksum(&methods_offsets_[0],
-                            sizeof(methods_offsets_[0]) * methods_offsets_.size());
+          + (sizeof(class_offsets_[0]) * class_offsets_.size());
 }
 
 bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
                                   OutputStream* out,
                                   const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!out->WriteFully(&dex_file_location_size_, sizeof(dex_file_location_size_))) {
+  if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
-  if (!out->WriteFully(dex_file_location_data_, dex_file_location_size_)) {
+  if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
-  if (!out->WriteFully(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) {
+  if (!oat_writer->WriteData(out,
+                             &dex_file_location_checksum_,
+                             sizeof(dex_file_location_checksum_))) {
     PLOG(ERROR) << "Failed to write dex file location checksum to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
-  if (!out->WriteFully(&dex_file_offset_, sizeof(dex_file_offset_))) {
+  if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
-  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
+  if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_lookup_table_offset_ += sizeof(lookup_table_offset_);
-  if (!out->WriteFully(&methods_offsets_[0],
-                      sizeof(methods_offsets_[0]) * methods_offsets_.size())) {
+  oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+  if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_methods_offsets_ +=
-      sizeof(methods_offsets_[0]) * methods_offsets_.size();
+  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
 OatWriter::OatClass::OatClass(size_t offset,
-                              const std::vector<CompiledMethod*>& compiled_methods,
+                              const dchecked_vector<CompiledMethod*>& compiled_methods,
                               uint32_t num_non_null_compiled_methods,
                               mirror::Class::Status status)
     : compiled_methods_(compiled_methods) {
@@ -1588,7 +1684,7 @@
 
   uint32_t oat_method_offsets_offset_from_oat_class = sizeof(type_) + sizeof(status_);
   if (type_ == kOatClassSomeCompiled) {
-    method_bitmap_ = new BitVector(num_methods, false, Allocator::GetMallocAllocator());
+    method_bitmap_.reset(new BitVector(num_methods, false, Allocator::GetMallocAllocator()));
     method_bitmap_size_ = method_bitmap_->GetSizeOf();
     oat_method_offsets_offset_from_oat_class += sizeof(method_bitmap_size_);
     oat_method_offsets_offset_from_oat_class += method_bitmap_size_;
@@ -1611,10 +1707,6 @@
   }
 }
 
-OatWriter::OatClass::~OatClass() {
-  delete method_bitmap_;
-}
-
 size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader(
     size_t class_def_method_index_) const {
   uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_);
@@ -1637,51 +1729,42 @@
           + (sizeof(method_offsets_[0]) * method_offsets_.size());
 }
 
-void OatWriter::OatClass::UpdateChecksum(OatHeader* oat_header) const {
-  oat_header->UpdateChecksum(&status_, sizeof(status_));
-  oat_header->UpdateChecksum(&type_, sizeof(type_));
-  if (method_bitmap_size_ != 0) {
-    CHECK_EQ(kOatClassSomeCompiled, type_);
-    oat_header->UpdateChecksum(&method_bitmap_size_, sizeof(method_bitmap_size_));
-    oat_header->UpdateChecksum(method_bitmap_->GetRawStorage(), method_bitmap_size_);
-  }
-  oat_header->UpdateChecksum(&method_offsets_[0],
-                             sizeof(method_offsets_[0]) * method_offsets_.size());
-}
-
 bool OatWriter::OatClass::Write(OatWriter* oat_writer,
                                 OutputStream* out,
                                 const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!out->WriteFully(&status_, sizeof(status_))) {
+  if (!oat_writer->WriteData(out, &status_, sizeof(status_))) {
     PLOG(ERROR) << "Failed to write class status to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_status_ += sizeof(status_);
-  if (!out->WriteFully(&type_, sizeof(type_))) {
+
+  if (!oat_writer->WriteData(out, &type_, sizeof(type_))) {
     PLOG(ERROR) << "Failed to write oat class type to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_type_ += sizeof(type_);
+
   if (method_bitmap_size_ != 0) {
     CHECK_EQ(kOatClassSomeCompiled, type_);
-    if (!out->WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) {
+    if (!oat_writer->WriteData(out, &method_bitmap_size_, sizeof(method_bitmap_size_))) {
       PLOG(ERROR) << "Failed to write method bitmap size to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_);
-    if (!out->WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
+
+    if (!oat_writer->WriteData(out, method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
       PLOG(ERROR) << "Failed to write method bitmap to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_;
   }
-  if (!out->WriteFully(&method_offsets_[0],
-                      sizeof(method_offsets_[0]) * method_offsets_.size())) {
+
+  if (!oat_writer->WriteData(out, method_offsets_.data(), GetMethodOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write method offsets to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_class_method_offsets_ += sizeof(method_offsets_[0]) * method_offsets_.size();
+  oat_writer->size_oat_class_method_offsets_ += GetMethodOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7027434..5feb5fc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -21,12 +21,14 @@
 #include <cstddef>
 #include <memory>
 
+#include "base/dchecked_vector.h"
 #include "linker/relative_patcher.h"  // For linker::RelativePatcherTargetProvider.
 #include "mem_map.h"
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
 #include "safe_map.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -38,6 +40,10 @@
 class TimingLogger;
 class TypeLookupTable;
 
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
+
 // OatHeader         variable length with count of D OatDexFiles
 //
 // OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
@@ -120,8 +126,8 @@
     return bss_size_;
   }
 
-  const std::vector<uintptr_t>& GetAbsolutePatchLocations() const {
-    return absolute_patch_locations_;
+  ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const {
+    return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
   bool WriteRodata(OutputStream* out);
@@ -129,20 +135,8 @@
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
-
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
-    return method_info_;
+  ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
+    return ArrayRef<const dwarf::MethodDebugInfo>(method_info_);
   }
 
   const CompilerDriver* GetCompilerDriver() {
@@ -150,6 +144,9 @@
   }
 
  private:
+  class OatClass;
+  class OatDexFile;
+
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
   // we can share a lot of code for processing the maps with template classes below.
@@ -183,10 +180,8 @@
   size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
-  size_t InitOatCode(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  size_t InitOatCodeDexFiles(size_t offset)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t InitOatCode(size_t offset);
+  size_t InitOatCodeDexFiles(size_t offset);
 
   bool WriteTables(OutputStream* out, const size_t file_offset);
   bool WriteLookupTables(OutputStream* out, const size_t file_offset);
@@ -194,93 +189,11 @@
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
+  bool GetOatDataOffset(OutputStream* out);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+  bool WriteData(OutputStream* out, const void* data, size_t size);
 
-  class OatDexFile {
-   public:
-    OatDexFile(size_t offset, const DexFile& dex_file);
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
-
-    // Offset of start of OatDexFile from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
-
-    // data to write
-    uint32_t dex_file_location_size_;
-    const uint8_t* dex_file_location_data_;
-    uint32_t dex_file_location_checksum_;
-    uint32_t dex_file_offset_;
-    uint32_t lookup_table_offset_;
-    TypeLookupTable* lookup_table_;  // Owned by the dex file.
-    std::vector<uint32_t> methods_offsets_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatDexFile);
-  };
-
-  class OatClass {
-   public:
-    OatClass(size_t offset,
-             const std::vector<CompiledMethod*>& compiled_methods,
-             uint32_t num_non_null_compiled_methods,
-             mirror::Class::Status status);
-    ~OatClass();
-    size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
-    size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
-    size_t SizeOf() const;
-    void UpdateChecksum(OatHeader* oat_header) const;
-    bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
-
-    CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const {
-      DCHECK_LT(class_def_method_index, compiled_methods_.size());
-      return compiled_methods_[class_def_method_index];
-    }
-
-    // Offset of start of OatClass from beginning of OatHeader. It is
-    // used to validate file position when writing.
-    size_t offset_;
-
-    // CompiledMethods for each class_def_method_index, or null if no method is available.
-    std::vector<CompiledMethod*> compiled_methods_;
-
-    // Offset from OatClass::offset_ to the OatMethodOffsets for the
-    // class_def_method_index. If 0, it means the corresponding
-    // CompiledMethod entry in OatClass::compiled_methods_ should be
-    // null and that the OatClass::type_ should be kOatClassBitmap.
-    std::vector<uint32_t> oat_method_offsets_offsets_from_oat_class_;
-
-    // data to write
-
-    static_assert(mirror::Class::Status::kStatusMax < (2 ^ 16), "class status won't fit in 16bits");
-    int16_t status_;
-
-    static_assert(OatClassType::kOatClassMax < (2 ^ 16), "oat_class type won't fit in 16bits");
-    uint16_t type_;
-
-    uint32_t method_bitmap_size_;
-
-    // bit vector indexed by ClassDef method index. When
-    // OatClassType::type_ is kOatClassBitmap, a set bit indicates the
-    // method has an OatMethodOffsets in methods_offsets_, otherwise
-    // the entry was ommited to save space. If OatClassType::type_ is
-    // not is kOatClassBitmap, the bitmap will be null.
-    BitVector* method_bitmap_;
-
-    // OatMethodOffsets and OatMethodHeaders for each CompiledMethod
-    // present in the OatClass. Note that some may be missing if
-    // OatClass::compiled_methods_ contains null values (and
-    // oat_method_offsets_offsets_from_oat_class_ should contain 0
-    // values in this case).
-    std::vector<OatMethodOffsets> method_offsets_;
-    std::vector<OatQuickMethodHeader> method_headers_;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(OatClass);
-  };
-
-  std::vector<DebugInfo> method_info_;
+  dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
@@ -309,9 +222,9 @@
 
   // data to write
   SafeMap<std::string, std::string>* key_value_store_;
-  OatHeader* oat_header_;
-  std::vector<OatDexFile*> oat_dex_files_;
-  std::vector<OatClass*> oat_classes_;
+  std::unique_ptr<OatHeader> oat_header_;
+  dchecked_vector<OatDexFile> oat_dex_files_;
+  dchecked_vector<OatClass> oat_classes_;
   std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
   std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
   std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
@@ -344,14 +257,14 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
-  uint32_t size_oat_dex_file_methods_offsets_;
+  uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_dex_file_class_offsets_;
+  uint32_t size_oat_lookup_table_alignment_;
+  uint32_t size_oat_lookup_table_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
-  uint32_t size_oat_lookup_table_alignment_;
-  uint32_t size_oat_lookup_table_offset_;
-  uint32_t size_oat_lookup_table_;
 
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index f985745..f0cafc8 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -61,40 +61,6 @@
       && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
 }
 
-// Returns an instruction with the opposite boolean value from 'cond'.
-static HInstruction* GetOppositeCondition(HInstruction* cond) {
-  HGraph* graph = cond->GetBlock()->GetGraph();
-  ArenaAllocator* allocator = graph->GetArena();
-
-  if (cond->IsCondition()) {
-    HInstruction* lhs = cond->InputAt(0);
-    HInstruction* rhs = cond->InputAt(1);
-    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
-      case kCondEQ: return new (allocator) HEqual(lhs, rhs);
-      case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
-      case kCondLT: return new (allocator) HLessThan(lhs, rhs);
-      case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
-      case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
-      case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
-      case kCondB:  return new (allocator) HBelow(lhs, rhs);
-      case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
-      case kCondA:  return new (allocator) HAbove(lhs, rhs);
-      case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
-    }
-  } else if (cond->IsIntConstant()) {
-    HIntConstant* int_const = cond->AsIntConstant();
-    if (int_const->IsZero()) {
-      return graph->GetIntConstant(1);
-    } else {
-      DCHECK(int_const->IsOne());
-      return graph->GetIntConstant(0);
-    }
-  }
-  // General case when 'cond' is another instruction of type boolean,
-  // as verified by SSAChecker.
-  return new (allocator) HBooleanNot(cond);
-}
-
 void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
   DCHECK(block->EndsWithIf());
 
@@ -126,10 +92,7 @@
 
   HInstruction* replacement;
   if (NegatesCondition(true_value, false_value)) {
-    replacement = GetOppositeCondition(if_condition);
-    if (replacement->GetBlock() == nullptr) {
-      block->InsertInstructionBefore(replacement, if_instruction);
-    }
+    replacement = graph_->InsertOppositeCondition(if_condition, if_instruction);
   } else if (PreservesCondition(true_value, false_value)) {
     replacement = if_condition;
   } else {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index cca0baf..4c3f66a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -175,6 +176,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +473,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1111,7 +499,9 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@
         first_constant_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
+        has_deoptimization_on_constant_subscripts_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,25 +576,6 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1;
-    InductionVarRange::Value v2;
-    bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
-  }
-
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1330,17 +721,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1364,17 +744,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1400,38 +769,34 @@
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
+    bool try_dynamic_bce = true;
 
     if (!index->IsIntConstant()) {
+      // Non-constant subscript.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
       // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant subscript.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
@@ -1439,7 +804,7 @@
       }
       if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
@@ -1450,7 +815,7 @@
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1485,11 +850,11 @@
           ValueRange(GetGraph()->GetArena(), lower, upper);
       GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      TryDynamicBCE(bounds_check);
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1508,7 +873,7 @@
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1555,7 +920,7 @@
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
       IfCondition cmp = cond->GetCondition();
@@ -1564,42 +929,11 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
       }
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1613,7 +947,7 @@
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1715,19 +1049,19 @@
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1742,7 +1076,7 @@
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1766,9 +1100,12 @@
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+      return;
+    }
+    // If this instruction was added by AddCompareWithDeoptimization(), narrow
+    // the range accordingly in subsequent basic blocks.
     HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
     HInstruction* instruction = less_than_or_equal->InputAt(0);
     if (instruction->IsArrayLength()) {
@@ -1782,6 +1119,35 @@
     }
   }
 
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after deoptimization on array references
+    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+    * those cases it would be unsafe to hoist array references across their deoptimization
+    * instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) &&
+          loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          HoistToPreheaderOrDeoptBlock(loop, array_get);
+        }
+      }
+    }
+  }
+
   void AddCompareWithDeoptimization(HInstruction* array_length,
                                     HIntConstant* const_instr,
                                     HBasicBlock* block) {
@@ -1803,6 +1169,9 @@
     block->InsertInstructionBefore(cond, bounds_check);
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+    // Flag that this kind of deoptimization on array references with constant
+    // subscripts has occurred to prevent further hoisting of these references.
+    has_deoptimization_on_constant_subscripts_ = true;
   }
 
   void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1846,21 +1215,432 @@
     }
   }
 
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HInstruction* context,
+                            HInstruction* index,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+    do {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+          return false;
+        }
+        // Use analysis for static bce only if loop is finite.
+        if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
+        }
+      }
+    } while (induction_range_.RefineOuter(&v1, &v2));
+    return false;
+  }
+
+  /**
+   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+   * will go out of range (we want to be rather certain of that given the slowdown of
+   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+   * bounds checks and related null checks removed.
+   */
+  void TryDynamicBCE(HBoundsCheck* instruction) {
+    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+    HInstruction* index = instruction->InputAt(0);
+    HInstruction* length = instruction->InputAt(1);
+    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+    bool needs_finite_test = false;
+    bool needs_taken_test = false;
+    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+        induction_range_.CanGenerateCode(
+            instruction, index, &needs_finite_test, &needs_taken_test) &&
+        CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
+      HInstruction* lower = nullptr;
+      HInstruction* upper = nullptr;
+      // Generate the following unsigned comparisons
+      //     if (lower > upper)   deoptimize;
+      //     if (upper >= length) deoptimize;
+      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+      //     if (upper >= length) deoptimize;
+      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+      if (lower != nullptr) {
+        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+      }
+      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      ReplaceInstruction(instruction, index);
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+        if (!block->Dominates(back_edge)) {
+          return false;
+        }
+      }
+      // Success!
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(length)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreheaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(check)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsDefinedOutOfTheLoop(array)) {
+        // Generate: if (array == null) deoptimize;
+        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeopt(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(
+      HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /** Inserts a deoptimization test. */
+  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    HBasicBlock* preheader = loop->GetPreHeader();
+    HBasicBlock* block = preheader;
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      block = it->second;
+    }
+    // Hoist the instruction.
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   * Returns block in which deoptimizations/invariants can be put.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader), or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test) {
+      return loop->GetPreHeader();
+    } else {
+      auto it = taken_test_loop_.find(loop_id);
+      if (it != taken_test_loop_.end()) {
+        return it->second;
+      }
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+    return true_block;
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *             header
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+             !it2.Done();
+             it2.Advance()) {
+          HInstruction* user = it2.Current()->GetUser();
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, it2.Current()->GetIndex());
+          }
+        }
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+             !it2.Done();
+             it2.Advance()) {
+          HEnvironment* user = it2.Current()->GetUser();
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
   // beneficial to add a compare instruction that has deoptimization fallback and
   // eliminate those bounds checks.
   bool need_to_revisit_block_;
 
+  // Flag that denotes whether deoptimization has occurred on array references
+  // with constant subscripts (see AddCompareWithDeoptimization()).
+  bool has_deoptimization_on_constant_subscripts_;
+
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1872,14 +1652,12 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   HBasicBlock* last_visited_block = nullptr;
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
@@ -1896,6 +1674,9 @@
     visitor.VisitBasicBlock(current);
     last_visited_block = current;
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..b9df686 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
       : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
@@ -34,6 +38,7 @@
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..dbeb1cc 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 3257de1..4dd0d26 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -17,6 +17,8 @@
 #include "builder.h"
 
 #include "art_field-inl.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
 #include "dex/verified_method.h"
@@ -458,6 +460,19 @@
     return false;
   }
 
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    native_debug_info_locations->ClearAllBits();
+    FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
+  }
+
   CreateBlocksForTryCatch(code_item);
 
   InitializeParameters(code_item.ins_size_);
@@ -467,6 +482,11 @@
     // Update the current block if dex_pc starts a new block.
     MaybeUpdateCurrentBlock(dex_pc);
     const Instruction& instruction = *Instruction::At(code_ptr);
+    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+      if (current_block_ != nullptr) {
+        current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+    }
     if (!AnalyzeDexInstruction(instruction, dex_pc)) {
       return false;
     }
@@ -507,6 +527,47 @@
   current_block_ = block;
 }
 
+void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item,
+                                                 ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
+  // Add native debug info at the start of every basic block.
+  for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) {
+    if (FindBlockStartingAt(pc) != nullptr) {
+      locations->SetBit(pc);
+    }
+  }
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION:
+      case Instruction::MOVE_RESULT:
+      case Instruction::MOVE_RESULT_WIDE:
+      case Instruction::MOVE_RESULT_OBJECT: {
+        // The compiler checks that there are no instructions before those.
+        // So generate HNativeDebugInfo after them instead.
+        locations->ClearBit(inst->GetDexPc(code_item.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
 bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
@@ -735,6 +796,79 @@
   }
 }
 
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time compilation),
+      // bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    uint16_t vtable_index = resolved_method->GetMethodIndex();
+    ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+        vtable_index, class_linker->GetImagePointerSize());
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      // could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -742,22 +876,18 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
   const char* descriptor = dex_file_->GetMethodShorty(method_idx);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
 
   // Remove the return type from the 'proto'.
   size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
+  if (invoke_type != kStatic) {  // instance call
     // One extra argument for 'this'.
     number_of_arguments++;
   }
 
   MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
 
   // Special handling for string init.
   int32_t string_init_offset = 0;
@@ -780,7 +910,7 @@
         method_idx,
         target_method,
         dispatch_info,
-        original_invoke_type,
+        invoke_type,
         kStatic /* optimized_invoke_type */,
         HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
     return HandleStringInit(invoke,
@@ -791,23 +921,16 @@
                             descriptor);
   }
 
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (resolved_method == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
                                                      dex_pc,
                                                      method_idx,
-                                                     original_invoke_type);
+                                                     invoke_type);
     return HandleInvoke(invoke,
                         number_of_vreg_arguments,
                         args,
@@ -817,21 +940,26 @@
                         nullptr /* clinit_check */);
   }
 
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
   HInvoke* invoke = nullptr;
 
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
     // By default, consider that the called method implicitly requires
     // an initialization check of its declaring method.
     HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
         = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -847,24 +975,26 @@
                                                 method_idx,
                                                 target_method,
                                                 dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
+                                                invoke_type,
+                                                invoke_type,
                                                 clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeVirtual(arena_,
                                          number_of_arguments,
                                          return_type,
                                          dex_pc,
                                          method_idx,
-                                         table_index);
+                                         resolved_method->GetMethodIndex());
   } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeInterface(arena_,
                                            number_of_arguments,
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           table_index);
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -876,26 +1006,106 @@
                       clinit_check);
 }
 
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  bool finalizable;
+  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<3> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      /*needs_access_check*/ can_throw,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+
+  current_block_->AddInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(cls);
+  }
+
+  current_block_->AddInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      can_throw,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
   uint32_t storage_index = DexFile::kDexNoIndex;
@@ -913,41 +1123,21 @@
 
   HClinitCheck* clinit_check = nullptr;
 
-  if (!outer_class->IsInterface()
-      && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
-    // If the outer class is the declaring class or a subclass
-    // of the declaring class, no class initialization is needed
-    // before the static method call.
-    // Note that in case of inlining, we do not need to add clinit checks
-    // to calls that satisfy this subclass check with any inlined methods. This
-    // will be detected by the optimization passes.
+  if (IsInitialized(resolved_method_class)) {
     *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
   } else if (storage_index != DexFile::kDexNoIndex) {
-    // If the method's class type index is available, check
-    // whether we should add an explicit class initialization
-    // check for its declaring class before the static method call.
-
-    // TODO: find out why this check is needed.
-    bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-        *outer_compilation_unit_->GetDexFile(), storage_index);
-    bool is_initialized =
-        resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
-    if (is_initialized) {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-    } else {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-      HLoadClass* load_class = new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          storage_index,
-          *dex_compilation_unit_->GetDexFile(),
-          is_outer_class,
-          dex_pc,
-          /*needs_access_check*/ false);
-      current_block_->AddInstruction(load_class);
-      clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-      current_block_->AddInstruction(clinit_check);
-    }
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+    current_block_->AddInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(clinit_check);
   }
   return clinit_check;
 }
@@ -1272,7 +1462,7 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<5> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
@@ -1318,26 +1508,26 @@
     }
   }
 
-  // TODO: find out why this check is needed.
-  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *outer_compilation_unit_->GetDexFile(), storage_index);
-  bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
-                                                 *dex_compilation_unit_->GetDexFile(),
+                                                 outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
-                                                 /*needs_access_check*/ false);
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
-  if (!is_initialized && !is_outer_class) {
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
 
-  uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
   if (is_put) {
     // We need to keep the class alive before loading the value.
     Temporaries temps(graph_);
@@ -1601,19 +1791,20 @@
 
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
   Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
 
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
   HLoadClass* cls = new (arena_) HLoadClass(
       graph_->GetCurrentMethod(),
       type_index,
-      *dex_compilation_unit_->GetDexFile(),
+      dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      !can_access);
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
   current_block_->AddInstruction(cls);
 
   // The class needs a temporary before being used by the type check.
@@ -2509,20 +2700,9 @@
         current_block_->AddInstruction(fake_string);
         UpdateLocal(register_index, fake_string, dex_pc);
       } else {
-        bool finalizable;
-        bool can_throw = NeedsAccessCheck(type_index, &finalizable);
-        QuickEntrypointEnum entrypoint = can_throw
-            ? kQuickAllocObjectWithAccessCheck
-            : kQuickAllocObject;
-
-        current_block_->AddInstruction(new (arena_) HNewInstance(
-            graph_->GetCurrentMethod(),
-            dex_pc,
-            type_index,
-            *dex_compilation_unit_->GetDexFile(),
-            can_throw,
-            finalizable,
-            entrypoint));
+        if (!BuildNewInstance(type_index, dex_pc)) {
+          return false;
+        }
         UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       }
       break;
@@ -2722,15 +2902,21 @@
     }
 
     case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+          *dex_file_, string_index);
       current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc));
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+          *dex_file_, string_index);
       current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc));
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
@@ -2750,10 +2936,11 @@
       current_block_->AddInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
-          *dex_compilation_unit_->GetDexFile(),
+          *dex_file_,
           IsOutermostCompilingClass(type_index),
           dex_pc,
-          !can_access));
+          !can_access,
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index f857ef0..26bf1cb 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -80,7 +80,8 @@
         can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
-        dex_cache_(NullHandle<mirror::DexCache>()) {}
+        null_dex_cache_(),
+        dex_cache_(null_dex_cache_) {}
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
@@ -90,8 +91,9 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table.
-  static constexpr uint16_t kSmallSwitchThreshold = 5;
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
 
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
@@ -110,6 +112,7 @@
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t dex_pc);
+  void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations);
   HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
   HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
 
@@ -305,8 +308,21 @@
 
   HClinitCheck* ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* method,
       uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
 
   ArenaAllocator* const arena_;
 
@@ -357,6 +373,7 @@
   const uint8_t* interpreter_metadata_;
 
   // Dex cache for dex_file_.
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
   Handle<mirror::DexCache> dex_cache_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 77d53fc..53d3615 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -246,10 +246,7 @@
         InitLocationsBaseline(current);
       }
       DCHECK(CheckTypeConsistency(current));
-      uintptr_t native_pc_begin = GetAssembler()->CodeSize();
       current->Accept(instruction_visitor);
-      uintptr_t native_pc_end = GetAssembler()->CodeSize();
-      RecordNativeDebugInfo(current->GetDexPc(), native_pc_begin, native_pc_end);
     }
   }
 
@@ -383,11 +380,11 @@
     HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
     switch (call->GetMethodLoadKind()) {
       case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
+        locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
         break;
       case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
         locations->AddTemp(visitor->GetMethodLocation());
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+        locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
         break;
       default:
         locations->AddTemp(visitor->GetMethodLocation());
@@ -926,17 +923,6 @@
   stack_map_stream_.FillIn(region);
 }
 
-void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
-                                          uintptr_t native_pc_begin,
-                                          uintptr_t native_pc_end) {
-  if (compiler_options_.GetGenerateDebugInfo() &&
-      dex_pc != kNoDexPc &&
-      native_pc_begin != native_pc_end) {
-    src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
-                                   static_cast<int32_t>(dex_pc)}));
-  }
-}
-
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 114d97b..eade05d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -269,8 +269,6 @@
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
-  // Record additional native to dex mappings for native debugging/profiling tools.
-  void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end);
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -452,10 +450,6 @@
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
-  const ArenaVector<SrcMapElem>& GetSrcMappingTable() const {
-    return src_map_;
-  }
-
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
@@ -498,7 +492,6 @@
         stats_(stats),
         graph_(graph),
         compiler_options_(compiler_options),
-        src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         current_slow_path_(nullptr),
         current_block_index_(0),
@@ -616,8 +609,6 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
-  // Native to dex_pc map used for native debugging/profiling tools.
-  ArenaVector<SrcMapElem> src_map_;
   ArenaVector<SlowPathCode*> slow_paths_;
 
   // The current slow path that we're generating code for.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 12ab68e..58feb67 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -77,6 +77,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -101,6 +102,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -123,6 +125,7 @@
     SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
@@ -179,6 +182,7 @@
         Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -214,6 +218,11 @@
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
     arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -260,6 +269,7 @@
     __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
 
     RestoreLiveRegisters(codegen, locations);
@@ -351,6 +361,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -393,6 +404,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ b(GetExitLabel());
   }
@@ -712,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1604,7 +1618,15 @@
                         /* false_target */ nullptr);
 }
 
-void LocationsBuilderARM::VisitCondition(HCondition* cond) {
+void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
+void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1635,7 +1657,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1692,83 +1714,83 @@
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
@@ -1910,10 +1932,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -2410,6 +2440,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2418,6 +2449,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2463,6 +2495,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2985,6 +3018,7 @@
         DCHECK_EQ(R0, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
@@ -2999,6 +3033,7 @@
       DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
 
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -3127,22 +3162,26 @@
         DCHECK_EQ(R1, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       break;
     }
 
     case Primitive::kPrimFloat: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
 
     case Primitive::kPrimDouble: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
 
@@ -3203,6 +3242,147 @@
   }
 }
 
+void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    __ Ror(out, in, rhs.AsRegister<Register>());
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= kMaxLongShiftValue;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= kArmBitsPerWord) {
+      rot -= kArmBitsPerWord;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot != 0u) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_right = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_left = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label shift_by_32_plus_shift_right;
+
+    __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep);
+    __ b(&shift_by_32_plus_shift_right, CC);
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+void LocationsBuilderARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (ror->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Primitive::Type type = ror->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HandleIntegerRotate(locations);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      HandleLongRotate(locations);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM::VisitRor(HRor* op) {
+  HandleRotate(op);
+}
+
+void InstructionCodeGeneratorARM::VisitRor(HRor* op) {
+  HandleRotate(op);
+}
+
 void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
 
@@ -3361,7 +3541,19 @@
             __ mov(o_l, ShifterOperand(high));
             __ LoadImmediate(o_h, 0);
           }
-        } else {  // shift_value < 32
+        } else if (shift_value == 1) {
+          if (op->IsShl()) {
+            __ Lsls(o_l, low, 1);
+            __ adc(o_h, high, ShifterOperand(high));
+          } else if (op->IsShr()) {
+            __ Asrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          } else {
+            __ Lsrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          }
+        } else {
+          DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
           if (op->IsShl()) {
             __ Lsl(o_h, high, shift_value);
             __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
@@ -3413,20 +3605,19 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3448,6 +3639,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -4320,7 +4512,7 @@
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for read barrier too.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4937,6 +5129,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -4958,7 +5151,6 @@
       __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
     }
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ LoadFromOffset(kLoadWord,
@@ -4977,14 +5169,19 @@
       __ LoadFromOffset(kLoadWord, out, out, cache_offset);
     }
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5019,16 +5216,15 @@
 }
 
 void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
@@ -5059,8 +5255,12 @@
     __ LoadFromOffset(kLoadWord, out, out, cache_offset);
   }
 
-  __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 static int32_t GetExceptionTlsOffset() {
@@ -5097,6 +5297,7 @@
 void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
   codegen_->InvokeRuntime(
       QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5537,6 +5738,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
@@ -5774,16 +5980,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5806,6 +6002,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5828,7 +6050,7 @@
       __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -5837,13 +6059,17 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -5899,12 +6125,16 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -5926,7 +6156,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5952,6 +6186,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -6002,7 +6258,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
       codegen_->GetAssembler()->IsThumb()) {
     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
     if (switch_instr->GetStartValue() != 0) {
@@ -6018,12 +6274,30 @@
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
     // Create a series of compare/jumps.
+    Register temp_reg = IP;
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      GenerateCompareWithImmediate(value_reg, lower_bound + i);
-      __ b(codegen_->GetLabelOf(successors[i]), EQ);
+    // Jump to successors[0] if value == lower_bound.
+    __ b(codegen_->GetLabelOf(successors[0]), EQ);
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      GenerateCompareWithImmediate(temp_reg, 1);
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
     // And the default for any other value.
@@ -6063,6 +6337,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 89de4f8..b7c58e1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,10 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -213,6 +217,10 @@
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
@@ -373,6 +381,31 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
   // Generate a read barrier for a heap reference within `instruction`.
   //
   // A read barrier for an object reference read from the heap is
@@ -419,7 +452,12 @@
   void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
 
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -441,6 +479,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d1bddf6..b49f42b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -42,6 +42,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm64 {
 
 using helpers::CPURegisterFrom;
@@ -68,10 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -431,15 +434,6 @@
 
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = InputRegisterAt(instruction_, 0);
-      Register temp = WRegisterFrom(locations->GetTemp(0));
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -454,11 +448,11 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+                           const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
-                           const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -494,6 +488,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -551,7 +546,7 @@
 
 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
   uint32_t num_entries = switch_instr_->GetNumEntries();
-  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
@@ -571,6 +566,271 @@
   }
 }
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ Ldr(out, HeapOperand(out, class_offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+
+    // Note: In the case of a HArrayGet instruction, when the base
+    // address is a HArm64IntermediateAddress instruction, it does not
+    // point to the array object itself, but to an offset within this
+    // object. However, the read barrier entry point needs the array
+    // object address to be passed as first argument. So we
+    // temporarily set back `obj_` to that address, and restore its
+    // initial value later.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Sub(obj_reg, obj_reg, offset_);
+    }
+
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
+        if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to vixl::MacroAssembler::Lsl and
+          // vixl::MacroAssembler::Mov below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg.W(), index_reg);
+          index_reg = free_reg;
+          index = LocationFrom(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Add(index_reg, index_reg, Operand(offset_));
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          LocationFrom(calling_convention.GetRegisterAt(0)),
+                          type,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          LocationFrom(calling_convention.GetRegisterAt(1)),
+                          type,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            LocationFrom(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
+    }
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+
+    // Restore the value of `obj_` when it corresponds to a
+    // HArm64IntermediateAddress instruction.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Add(obj_reg, obj_reg, offset_);
+    }
+
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return Register(VIXLRegCodeFromART(i), kXRegSize);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    // The argument of the ReadBarrierForRootSlow is not a managed
+    // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
+    // thus we need a 64-bit move here, and we cannot use
+    //
+    //   arm64_codegen->MoveLocation(
+    //       LocationFrom(calling_convention.GetRegisterAt(0)),
+    //       root_,
+    //       type);
+    //
+    // which would emit a 32-bit move, as `type` is a (32-bit wide)
+    // reference type (`Primitive::kPrimNot`).
+    __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -1401,13 +1661,25 @@
 }
 
 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the load to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
@@ -1436,7 +1708,11 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+    LocationSummary* locations = instruction->GetLocations();
+    Location base = locations->InAt(0);
+    Location out = locations->Out();
+    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
   }
 }
 
@@ -1515,6 +1791,17 @@
         __ Orr(dst, lhs, rhs);
       } else if (instr->IsSub()) {
         __ Sub(dst, lhs, rhs);
+      } else if (instr->IsRor()) {
+        if (rhs.IsImmediate()) {
+          uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+          __ Ror(dst, lhs, shift);
+        } else {
+          // Ensure shift distance is in the same size register as the result. If
+          // we are rotating a long and the shift comes in a w register originally,
+          // we don't need to sxtw for use as an x since the shift distances are
+          // all & reg_bits - 1.
+          __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
+        }
       } else {
         DCHECK(instr->IsXor());
         __ Eor(dst, lhs, rhs);
@@ -1613,6 +1900,82 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  if (instruction->GetInstrKind() == HInstruction::kNeg) {
+    locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  Primitive::Type type = instruction->GetType();
+  HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+  Register out = OutputRegister(instruction);
+  Register left;
+  if (kind != HInstruction::kNeg) {
+    left = InputRegisterAt(instruction, 0);
+  }
+  // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+  // shifter operand operation, the IR generating `right_reg` (input to the type
+  // conversion) can have a different type from the current instruction's type,
+  // so we manually indicate the type.
+  Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
+  int64_t shift_amount = (type == Primitive::kPrimInt)
+    ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue)
+    : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue);
+
+  Operand right_operand(0);
+
+  HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+    right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
+  } else {
+    right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+  }
+
+  // Logical binary operations do not support extension operations in the
+  // operand. Note that VIXL would still manage if it was passed by generating
+  // the extension as a separate instruction.
+  // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
+  DCHECK(!right_operand.IsExtendedRegister() ||
+         (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
+          kind != HInstruction::kNeg));
+  switch (kind) {
+    case HInstruction::kAdd:
+      __ Add(out, left, right_operand);
+      break;
+    case HInstruction::kAnd:
+      __ And(out, left, right_operand);
+      break;
+    case HInstruction::kNeg:
+      DCHECK(instruction->InputAt(0)->AsConstant()->IsZero());
+      __ Neg(out, right_operand);
+      break;
+    case HInstruction::kOr:
+      __ Orr(out, left, right_operand);
+      break;
+    case HInstruction::kSub:
+      __ Sub(out, left, right_operand);
+      break;
+    case HInstruction::kXor:
+      __ Eor(out, left, right_operand);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation kind: " << kind;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1628,23 +1991,75 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+  Register res = OutputRegister(instr);
+  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
+  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+
+  // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+  // This fixup should be carried out for all multiply-accumulate instructions:
+  // madd, msub, smaddl, smsubl, umaddl and umsubl.
+  if (instr->GetType() == Primitive::kPrimLong &&
+      codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
+    MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
+    vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+    if (prev->IsLoadOrStore()) {
+      // Make sure we emit only exactly one nop.
+      vixl::CodeBufferCheckScope scope(masm,
+                                       vixl::kInstructionSize,
+                                       vixl::CodeBufferCheckScope::kCheck,
+                                       vixl::CodeBufferCheckScope::kExactSize);
+      __ nop();
+    }
+  }
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ Madd(res, mul_left, mul_right, accumulator);
+  } else {
+    DCHECK(instr->GetOpKind() == HInstruction::kSub);
+    __ Msub(res, mul_left, mul_right, accumulator);
+  }
+}
+
+void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Location index = instruction->GetLocations()->InAt(1);
-  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
+  LocationSummary* locations = instruction->GetLocations();
+  Location index = locations->InAt(1);
+  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   MemOperand source = HeapOperand(obj);
   CPURegister dest = OutputCPURegister(instruction);
 
@@ -1676,8 +2091,22 @@
   codegen_->Load(type, dest, source);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
-  if (instruction->GetType() == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
+  if (type == Primitive::kPrimNot) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    Location obj_loc = locations->InAt(0);
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
+      // not contain the base address of the array object, which is
+      // needed by the read barrier entry point. So the read barrier
+      // slow path will temporarily set back `obj_loc` to the right
+      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+    }
   }
 }
 
@@ -1695,12 +2124,19 @@
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check  || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+  if (Primitive::IsFloatingPointType(value_type)) {
     locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1710,7 +2146,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -1724,7 +2160,7 @@
   BlockPoolsScope block_pools(masm);
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call);
+    DCHECK(!may_need_runtime_call_for_type_check);
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
       destination = HeapOperand(array, offset);
@@ -1774,7 +2210,7 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -1789,26 +2225,66 @@
           __ Bind(&non_zero);
         }
 
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        __ Ldr(temp, HeapOperand(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GetAssembler()->MaybeUnpoisonHeapReference(temp);
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::Label do_put;
-          __ B(eq, &do_put);
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // No need to unpoison, we're comparing against null.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ Ldr(temp, HeapOperand(temp, component_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+          //
+          //   __ Cmp(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ B(slow_path->GetEntryLabel());
         } else {
-          __ B(ne, slow_path->GetEntryLabel());
+          Register temp2 = temps.AcquireSameSizeAs(array);
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ Ldr(temp, HeapOperand(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ Ldr(temp, HeapOperand(temp, component_offset));
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ Cmp(temp, temp2);
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            vixl::Label do_put;
+            __ B(eq, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ Ldr(temp, HeapOperand(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ Cbnz(temp, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ B(ne, slow_path->GetEntryLabel());
+          }
+          temps.Release(temp2);
         }
-        temps.Release(temp2);
       }
 
       if (kPoisonHeapReferences) {
@@ -1824,7 +2300,7 @@
       }
       __ Str(source, destination);
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
     }
@@ -1951,7 +2427,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
@@ -1971,7 +2447,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -2019,8 +2495,8 @@
   M(Above)                                                                               \
   M(AboveOrEqual)
 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
-void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
-void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
+void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
+void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
 #undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
@@ -2473,6 +2949,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   HandleFieldGet(instruction);
 }
@@ -2491,40 +2975,44 @@
 
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
@@ -2540,15 +3028,9 @@
     __ Cbz(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ Ldr(target, HeapOperand(obj.W(), class_offset));
-  GetAssembler()->MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ Ldr(out, HeapOperand(obj.W(), class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -2559,13 +3041,23 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -2576,14 +3068,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -2594,14 +3096,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       vixl::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ Ldr(out, HeapOperand(out, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -2612,11 +3124,12 @@
       __ B(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
@@ -2625,13 +3138,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ B(&done);
       }
@@ -2657,58 +3182,62 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
-  Register temp;
-  if (!locations->WillCall()) {
-    temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
-  }
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCodeARM64* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARM64* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                          is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   vixl::Label done;
   // Avoid null check if we know obj is not null.
@@ -2716,76 +3245,159 @@
     __ Cbz(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ Ldr(obj, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ Ldr(temp, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ Ldr(temp, HeapOperand(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ B(ne, slow_path->GetEntryLabel());
+      __ B(ne, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop;
+      vixl::Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ Cbnz(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ B(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      vixl::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ Ldr(temp, HeapOperand(temp, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ Cbnz(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Cbz(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
@@ -2828,10 +3440,11 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = XRegisterFrom(locations->GetTemp(0));
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
-  Location receiver = invoke->GetLocations()->InAt(0);
+  Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
 
@@ -2843,14 +3456,22 @@
   scratch_scope.Exclude(ip1);
   __ Mov(ip1, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ Ldr(temp.W(), StackOperandFrom(receiver));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -2926,7 +3547,7 @@
       __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
@@ -2960,7 +3581,7 @@
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = XRegisterFrom(temp);
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -2972,7 +3593,7 @@
         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
       }
 
-      // temp = current_method->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ Ldr(reg.X(),
              MemOperand(method_reg.X(),
                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
@@ -3016,8 +3637,12 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -3027,8 +3652,15 @@
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   DCHECK(receiver.IsRegister());
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -3141,7 +3773,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0));
+      LocationFrom(vixl::x0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3151,30 +3784,56 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
+  Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
   Register current_method = InputRegisterAt(cls, 0);
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
-
-    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Cbz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ Add(out.X(), current_method.X(), declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
     } else {
-      __ Bind(slow_path->GetExitLabel());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+    }
+  } else {
+    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ Add(out.X(), out.X(), cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ Ldr(out, MemOperand(out.X(), cache_offset));
+    }
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Cbz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3210,24 +3869,50 @@
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
-  codegen_->AddSlowPath(slow_path);
-
+  Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
-  __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
-  __ Cbz(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ Add(out.X(), current_method.X(), declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ Add(out.X(), out.X(), cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ Ldr(out, MemOperand(out.X(), cache_offset));
+  }
+
+  if (!load->IsInDexCache()) {
+    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderARM64::VisitLocal(HLocal* local) {
@@ -3260,7 +3945,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -3349,8 +4038,6 @@
   locations->SetOut(LocationFrom(x0));
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
-  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
-                       void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -3372,17 +4059,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
-  DCHECK(type_index.Is(w0));
-  __ Mov(type_index, instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
@@ -3559,11 +4241,17 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
 
     default:
       LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -3593,6 +4281,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
+void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3630,6 +4326,7 @@
 
     default:
       LOG(FATAL) << "Unimplemented local type " << field_type;
+      UNREACHABLE();
   }
 }
 
@@ -3803,9 +4500,7 @@
     int min_size = std::min(result_size, input_size);
     Register output = OutputRegister(conversion);
     Register source = InputRegisterAt(conversion, 0);
-    if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
-      __ Ubfx(output, source, 0, result_size * kBitsPerByte);
-    } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
       // 'int' values are used directly as W registers, discarding the top
       // bits, so we don't need to sign-extend and can just perform a move.
       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
@@ -3814,9 +4509,11 @@
       // 32bit input value as a 64bit value assuming that the top 32 bits are
       // zero.
       __ Mov(output.W(), source.W());
-    } else if ((result_type == Primitive::kPrimChar) ||
-               ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      __ Ubfx(output,
+              output.IsX() ? source.X() : source.W(),
+              0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
     } else {
       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
@@ -3893,20 +4590,29 @@
   // ranges and emit the tables only as required.
   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  if (num_entries < kPackedSwitchJumpTableThreshold ||
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
       // Current instruction id is an upper bound of the number of HIRs in the graph.
       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
     // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+    Register temp = temps.AcquireW();
+    __ Subs(temp, value_reg, Operand(lower_bound));
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      int32_t case_value = lower_bound + i;
-      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-      if (case_value == 0) {
-        __ Cbz(value_reg, succ);
-      } else {
-        __ Cmp(value_reg, Operand(case_value));
-        __ B(eq, succ);
-      }
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Subs(temp, temp, Operand(2));
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp, Operand(1));
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
     }
 
     // And the default for any other value.
@@ -3951,6 +4657,82 @@
   }
 }
 
+void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Location ref,
+                                             Location obj,
+                                             uint32_t offset,
+                                             Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
+  }
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                    Location out,
+                                                    Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 881afcc..0e90ac6 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -215,6 +215,7 @@
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -257,6 +258,7 @@
   void HandleFieldSet(HInstruction* instruction);
   void HandleFieldGet(HInstruction* instruction);
   void HandleInvoke(HInvoke* instr);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
@@ -424,6 +426,51 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 919ed2d..07efdee 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -415,13 +415,11 @@
                                   dex_pc,
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -461,6 +459,7 @@
                                 dex_pc,
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -1957,11 +1956,8 @@
 void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
 
   switch (in_type) {
     case Primitive::kPrimLong:
@@ -1972,13 +1968,11 @@
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1987,7 +1981,10 @@
 
 void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
+  bool gt_bias = instruction->IsGtBias();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   //  0 if: left == right
   //  1 if: left  > right
@@ -1995,7 +1992,6 @@
   switch (in_type) {
     case Primitive::kPrimLong: {
       MipsLabel done;
-      Register res = locations->Out().AsRegister<Register>();
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
       Register lhs_low  = locations->InAt(0).AsRegisterPairLow<Register>();
       Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
@@ -2012,45 +2008,82 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
+        }
+      } else {
+        if (gt_bias) {
+          __ ColtS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
+        } else {
+          __ ColtS(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
+        }
+      }
+      __ Bind(&done);
+      break;
+    }
     case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      bool direct;
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgFloat);
-          direct = IsDirectEntrypoint(kQuickCmpgFloat);
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplFloat);
-          direct = IsDirectEntrypoint(kQuickCmplFloat);
+          __ CmpLtD(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
         }
       } else {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgDouble);
-          direct = IsDirectEntrypoint(kQuickCmpgDouble);
+        if (gt_bias) {
+          __ ColtD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplDouble);
-          direct = IsDirectEntrypoint(kQuickCmplDouble);
+          __ ColtD(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
         }
       }
-      codegen_->InvokeRuntime(entry_point_offset,
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
-        }
-      } else {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
-        }
-      }
+      __ Bind(&done);
       break;
     }
 
@@ -2059,163 +2092,233 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
 
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
   Register dst = locations->Out().AsRegister<Register>();
+  MipsLabel true_label;
 
-  Register lhs = locations->InAt(0).AsRegister<Register>();
-  Location rhs_location = locations->InAt(1);
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntCompare(instruction->GetCondition(), locations);
+      return;
 
-  Register rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<Register>();
+    case Primitive::kPrimLong:
+      // TODO: don't use branches.
+      GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
+      break;
   }
 
-  IfCondition if_cond = instruction->GetCondition();
+  // Convert the branches into the result.
+  MipsLabel done;
 
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
-      break;
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ B(&done);
 
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
+}
 
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
+void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
 
-    case kCondB:
-    case kCondAE:
-      // Use sltiu instruction if rhs_imm is in range [0, 32767] or in
-      // [max_unsigned - 32767 = 0xffff8000, max_unsigned = 0xffffffff].
-      if (use_imm &&
-          (IsUint<15>(rhs_imm) ||
-              IsUint<15>(rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm)) {
-          __ Sltiu(dst, lhs, rhs_imm);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff]).
-          __ Sltiu(dst, lhs, rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
 
-    case kCondBE:
-    case kCondA:
-      // Use sltiu instruction if rhs_imm is in range [0, 32766] or in
-      // [max_unsigned - 32767 - 1 = 0xffff7fff, max_unsigned - 1 = 0xfffffffe].
-      // lhs <= rhs is simulated via lhs < rhs + 1.
-      if (use_imm && (rhs_imm != -1) &&
-          (IsUint<15>(rhs_imm + 1) ||
-              IsUint<15>(rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm + 1)) {
-          // Simulate lhs <= rhs via lhs < rhs + 1.
-          __ Sltiu(dst, lhs, rhs_imm + 1);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff] where rhs_imm
-          // is in range [0xffff7fff, 0xfffffffe] since lhs <= rhs is simulated via lhs < rhs + 1).
-          __ Sltiu(dst, lhs, rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      __ Subu(out, ZERO, dividend);
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (ctz_imm == 1) {
+      // Fast path for division by +/-2, which is very common.
+      __ Srl(TMP, dividend, 31);
+    } else {
+      __ Sra(TMP, dividend, 31);
+      __ Srl(TMP, TMP, 32 - ctz_imm);
+    }
+    __ Addu(out, dividend, TMP);
+    __ Sra(out, out, ctz_imm);
+    if (imm < 0) {
+      __ Subu(out, ZERO, out);
+    }
+  } else {
+    if (ctz_imm == 1) {
+      // Fast path for modulo +/-2, which is very common.
+      __ Sra(TMP, dividend, 31);
+      __ Subu(out, dividend, TMP);
+      __ Andi(out, out, 1);
+      __ Addu(out, out, TMP);
+    } else {
+      __ Sra(TMP, dividend, 31);
+      __ Srl(TMP, TMP, 32 - ctz_imm);
+      __ Addu(out, dividend, TMP);
+      if (IsUint<16>(abs_imm - 1)) {
+        __ Andi(out, out, abs_imm - 1);
       } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
-        }
+        __ Sll(out, out, 32 - ctz_imm);
+        __ Srl(out, out, 32 - ctz_imm);
       }
-      break;
+      __ Subu(out, out, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  __ LoadConst32(TMP, magic);
+  if (isR6) {
+    __ MuhR6(TMP, dividend, TMP);
+  } else {
+    __ MultR2(dividend, TMP);
+    __ Mfhi(TMP);
+  }
+  if (imm > 0 && magic < 0) {
+    __ Addu(TMP, TMP, dividend);
+  } else if (imm < 0 && magic > 0) {
+    __ Subu(TMP, TMP, dividend);
+  }
+
+  if (shift != 0) {
+    __ Sra(TMP, TMP, shift);
+  }
+
+  if (instruction->IsDiv()) {
+    __ Sra(out, TMP, 31);
+    __ Subu(out, TMP, out);
+  } else {
+    __ Sra(AT, TMP, 31);
+    __ Subu(AT, TMP, AT);
+    __ LoadConst32(TMP, imm);
+    if (isR6) {
+      __ MulR6(TMP, AT, TMP);
+    } else {
+      __ MulR2(TMP, AT, TMP);
+    }
+    __ Subu(out, dividend, TMP);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Register out = locations->Out().AsRegister<Register>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(std::abs(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    Register dividend = locations->InAt(0).AsRegister<Register>();
+    Register divisor = second.AsRegister<Register>();
+    bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+    if (instruction->IsDiv()) {
+      if (isR6) {
+        __ DivR6(out, dividend, divisor);
+      } else {
+        __ DivR2(out, dividend, divisor);
+      }
+    } else {
+      if (isR6) {
+        __ ModR6(out, dividend, divisor);
+      } else {
+        __ ModR2(out, dividend, divisor);
+      }
+    }
   }
 }
 
@@ -2230,7 +2333,7 @@
   switch (type) {
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -2259,20 +2362,11 @@
 void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) {
   Primitive::Type type = instruction->GetType();
   LocationSummary* locations = instruction->GetLocations();
-  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   switch (type) {
-    case Primitive::kPrimInt: {
-      Register dst = locations->Out().AsRegister<Register>();
-      Register lhs = locations->InAt(0).AsRegister<Register>();
-      Register rhs = locations->InAt(1).AsRegister<Register>();
-      if (isR6) {
-        __ DivR6(dst, lhs, rhs);
-      } else {
-        __ DivR2(dst, lhs, rhs);
-      }
+    case Primitive::kPrimInt:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv),
                               instruction,
@@ -2419,6 +2513,627 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond,
+                                                      LocationSummary* locations) {
+  Register dst = locations->Out().AsRegister<Register>();
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm + 1);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
+                                                               LocationSummary* locations,
+                                                               MipsLabel* label) {
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int32_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqz(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnez(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs, label);
+        break;
+      case kCondLE:
+        __ Blez(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtz(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
+      rhs_reg = TMP;
+      __ LoadConst32(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beq(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bne(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bge(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bge(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeu(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeu(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
+                                                                LocationSummary* locations,
+                                                                MipsLabel* label) {
+  Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_high = ZERO;
+  Register rhs_low = ZERO;
+  int64_t imm = 0;
+  uint32_t imm_high = 0;
+  uint32_t imm_low = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+    imm_high = High32Bits(imm);
+    imm_low = Low32Bits(imm);
+  } else {
+    rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+    rhs_low = rhs_location.AsRegisterPairLow<Register>();
+  }
+
+  if (use_imm && imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs_high, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs_high, label);
+        break;
+      case kCondLE:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bgeu(AT, TMP, label);
+        break;
+      case kCondGT:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bltu(AT, TMP, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else if (use_imm) {
+    // TODO: more efficient comparison with constants without loading them into TMP/AT.
+    switch (cond) {
+      case kCondEQ:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  } else {
+    switch (cond) {
+      case kCondEQ:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
+                                                              bool gt_bias,
+                                                              Primitive::Type type,
+                                                              LocationSummary* locations,
+                                                              MipsLabel* label) {
+  FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  if (type == Primitive::kPrimFloat) {
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondNE:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqS(0, lhs, rhs);
+          __ Bc1t(0, label);
+          break;
+        case kCondNE:
+          __ CeqS(0, lhs, rhs);
+          __ Bc1f(0, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtS(0, lhs, rhs);
+          } else {
+            __ CultS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeS(0, lhs, rhs);
+          } else {
+            __ CuleS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultS(0, rhs, lhs);
+          } else {
+            __ ColtS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleS(0, rhs, lhs);
+          } else {
+            __ ColeS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondNE:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1t(0, label);
+          break;
+        case kCondNE:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1f(0, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtD(0, lhs, rhs);
+          } else {
+            __ CultD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeD(0, lhs, rhs);
+          } else {
+            __ CuleD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultD(0, rhs, lhs);
+          } else {
+            __ ColtD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleD(0, rhs, lhs);
+          } else {
+            __ ColeD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
                                                          size_t condition_input_index,
                                                          MipsLabel* true_target,
@@ -2455,7 +3170,7 @@
     // The condition instruction has been materialized, compare the output to 0.
     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-      if (true_target == nullptr) {
+    if (true_target == nullptr) {
       __ Beqz(cond_val.AsRegister<Register>(), false_target);
     } else {
       __ Bnez(cond_val.AsRegister<Register>(), true_target);
@@ -2464,98 +3179,27 @@
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
     HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    MipsLabel* branch_target = true_target;
 
-    Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    Register rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<Register>();
-    }
-
-    IfCondition if_cond;
-    MipsLabel* non_fallthrough_target;
     if (true_target == nullptr) {
       if_cond = condition->GetOppositeCondition();
-      non_fallthrough_target = false_target;
-    } else {
-      if_cond = condition->GetCondition();
-      non_fallthrough_target = true_target;
+      branch_target = false_target;
     }
 
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqz(lhs, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bnez(lhs, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Bltz(lhs, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bgez(lhs, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Blez(lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Bgtz(lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqz(lhs, non_fallthrough_target);  // <= 0 if zero
-          break;
-        case kCondA:
-          __ Bnez(lhs, non_fallthrough_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ B(non_fallthrough_target);  // always true
-          break;
-      }
-    } else {
-      if (use_imm) {
-        // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beq(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bne(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Blt(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bge(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Bge(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Blt(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          __ Bltu(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondAE:
-          __ Bgeu(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondBE:
-          __ Bgeu(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondA:
-          __ Bltu(rhs_reg, lhs, non_fallthrough_target);
-          break;
-      }
+    switch (type) {
+      default:
+        GenerateIntCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateLongCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
     }
   }
 
@@ -2600,6 +3244,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
@@ -2638,6 +3290,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2668,8 +3321,7 @@
 
   if (is_volatile && load_type == kLoadDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2692,21 +3344,34 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->Out().IsRegisterPair());
         dst = locations->Out().AsRegisterPairLow<Register>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        if (obj == dst) {
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+        } else {
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+        }
       } else {
         DCHECK(locations->Out().IsRegister());
         dst = locations->Out().AsRegister<Register>();
+        __ LoadFromOffset(load_type, dst, obj, offset);
       }
-      __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadSFromOffset(dst, obj, offset);
       } else {
-        __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadDFromOffset(dst, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   if (is_volatile) {
@@ -2752,6 +3417,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2782,8 +3448,7 @@
 
   if (is_volatile && store_type == kStoreDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check.
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2806,21 +3471,28 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->InAt(1).IsRegisterPair());
         src = locations->InAt(1).AsRegisterPairLow<Register>();
+        Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+        __ StoreToOffset(kStoreWord, src, obj, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
       } else {
         DCHECK(locations->InAt(1).IsRegister());
         src = locations->InAt(1).AsRegister<Register>();
+        __ StoreToOffset(store_type, src, obj, offset);
       }
-      __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->InAt(1).IsFpuRegister());
       FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreSToOffset(src, obj, offset);
       } else {
-        __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreDToOffset(src, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   // TODO: memory barriers?
@@ -3031,7 +3703,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -3043,7 +3715,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -3170,6 +3842,7 @@
                             cls->GetDexPc(),
                             nullptr,
                             IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3181,21 +3854,26 @@
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
     __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3232,24 +3910,28 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = load->IsInDexCache()
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Register out = locations->Out().AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  __ Beqz(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+
+  if (!load->IsInDexCache()) {
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Beqz(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
@@ -3478,17 +4160,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  Register current_method_register = calling_convention.GetRegisterAt(1);
-  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
-  // Move an uint16_t value to a register.
-  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(
       GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
@@ -3646,7 +4323,7 @@
   switch (type) {
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3676,21 +4353,11 @@
 
 void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
-  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   switch (type) {
-    case Primitive::kPrimInt: {
-      Register dst = locations->Out().AsRegister<Register>();
-      Register lhs = locations->InAt(0).AsRegister<Register>();
-      Register rhs = locations->InAt(1).AsRegister<Register>();
-      if (isR6) {
-        __ ModR6(dst, lhs, rhs);
-      } else {
-        __ ModR2(dst, lhs, rhs);
-      }
+    case Primitive::kPrimInt:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                               instruction,
@@ -3705,7 +4372,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmodf));
-      CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
     case Primitive::kPrimDouble: {
@@ -3713,7 +4380,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmod));
-      CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
     default:
@@ -3747,6 +4414,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -4123,83 +4800,83 @@
 }
 
 void LocationsBuilderMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
@@ -4228,19 +4905,31 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
+  Register temp_reg = TMP;
+  __ Addiu32(temp_reg, value_reg, -lower_bound);
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; ++i) {
-    int32_t case_value = lower_bound + i;
-    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqz(value_reg, successor_label);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beq(value_reg, TMP, successor_label);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
-  // Insert the default branch for every other value.
+  // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index e3a2cb4..38302ad 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -185,6 +185,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -220,15 +221,32 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
+  void GenerateIntCompareAndBranch(IfCondition cond,
+                                   LocationSummary* locations,
+                                   MipsLabel* label);
+  void GenerateLongCompareAndBranch(IfCondition cond,
+                                    LocationSummary* locations,
+                                    MipsLabel* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  MipsLabel* label);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              MipsLabel* true_target,
                              MipsLabel* false_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   MipsAssembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5864660..05834ff 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -27,8 +27,8 @@
 #include "mirror/class-inl.h"
 #include "offsets.h"
 #include "thread.h"
-#include "utils/mips64/assembler_mips64.h"
 #include "utils/assembler.h"
+#include "utils/mips64/assembler_mips64.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -210,7 +210,7 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
@@ -257,7 +257,7 @@
                                  type);
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
@@ -312,13 +312,13 @@
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
-      __ B(GetReturnLabel());
+      __ Bc(GetReturnLabel());
     } else {
-      __ B(mips64_codegen->GetLabelOf(successor_));
+      __ Bc(mips64_codegen->GetLabelOf(successor_));
     }
   }
 
-  Label* GetReturnLabel() {
+  Mips64Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -331,7 +331,7 @@
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  Label return_label_;
+  Mips64Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
 };
@@ -366,13 +366,11 @@
                                     instruction_,
                                     dex_pc,
                                     this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -380,7 +378,7 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
@@ -404,6 +402,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -441,6 +440,32 @@
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+  // Ensure that we fix up branches.
+  __ FinalizeCode();
+
+  // Adjust native pc offsets in stack maps.
+  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+    uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
+    uint32_t new_position = __ GetAdjustedPosition(old_position);
+    DCHECK_GE(new_position, old_position);
+    stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
+  }
+
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+      it.second.start = __ GetAdjustedPosition(it.second.start);
+      it.second.end = __ GetAdjustedPosition(it.second.end);
+    }
+    for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+      it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+      it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+    }
+  }
+
   CodeGenerator::Finalize(allocator);
 }
 
@@ -603,6 +628,7 @@
   }
 
   __ Jr(RA);
+  __ Nop();
 
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
@@ -939,7 +965,7 @@
 }
 
 void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
-  Label done;
+  Mips64Label done;
   GpuRegister card = AT;
   GpuRegister temp = TMP;
   __ Beqzc(value, &done);
@@ -1048,6 +1074,7 @@
   // TODO: anything related to T9/GP/GOT/PIC/.so's?
   __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
   __ Jalr(T9);
+  __ Nop();
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1079,7 +1106,7 @@
     __ Bind(slow_path->GetReturnLabel());
   } else {
     __ Beqzc(TMP, codegen_->GetLabelOf(successor));
-    __ B(slow_path->GetEntryLabel());
+    __ Bc(slow_path->GetEntryLabel());
     // slow_path will return to GetLabelOf(successor).
   }
 }
@@ -1583,6 +1610,7 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
       break;
     }
@@ -1669,12 +1697,7 @@
   // length is limited by the maximum positive signed 32-bit integer.
   // Unsigned comparison of length and index checks for index < 0
   // and for length <= index simultaneously.
-  // Mips R6 requires lhs != rhs for compact branches.
-  if (index == length) {
-    __ B(slow_path->GetEntryLabel());
-  } else {
-    __ Bgeuc(index, length, slow_path->GetEntryLabel());
-  }
+  __ Bgeuc(index, length, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
@@ -1729,11 +1752,7 @@
 void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
 
   switch (in_type) {
     case Primitive::kPrimLong:
@@ -1743,13 +1762,11 @@
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1758,14 +1775,15 @@
 
 void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  GpuRegister res = locations->Out().AsRegister<GpuRegister>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
+  bool gt_bias = instruction->IsGtBias();
 
   //  0 if: left == right
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
     case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
       Location rhs_location = locations->InAt(1);
       bool use_imm = rhs_location.IsConstant();
@@ -1780,22 +1798,52 @@
         rhs = rhs_location.AsRegister<GpuRegister>();
       }
       __ Slt(TMP, lhs, rhs);
-      __ Slt(dst, rhs, lhs);
-      __ Subu(dst, dst, TMP);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
       break;
     }
 
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      if (in_type == Primitive::kPrimFloat) {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
-                                                     : QUICK_ENTRY_POINT(pCmplFloat);
+    case Primitive::kPrimFloat: {
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqS(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (gt_bias) {
+        __ CmpLtS(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
       } else {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
-                                                     : QUICK_ENTRY_POINT(pCmplDouble);
+        __ CmpLtS(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
       }
-      codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      __ Bind(&done);
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqD(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (gt_bias) {
+        __ CmpLtD(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
+      } else {
+        __ CmpLtD(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
+      }
+      __ Bind(&done);
       break;
     }
 
@@ -1804,143 +1852,67 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
 
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
-
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
-
   GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-  Location rhs_location = locations->InAt(1);
+  Mips64Label true_label;
 
-  GpuRegister rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<GpuRegister>();
-  }
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations);
+      return;
+    case Primitive::kPrimLong:
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations);
+      return;
 
-  IfCondition if_cond = instruction->GetCondition();
-
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
-      break;
-
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
-
-    case kCondB:
-    case kCondAE:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) {
-        __ Sltiu(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondBE:
-    case kCondA:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Sltiu(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
-        }
-      }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
       break;
   }
+
+  // Convert the branches into the result.
+  Mips64Label done;
+
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ Bc(&done);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
 }
 
 void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -2264,7 +2236,7 @@
   if (value.IsConstant()) {
     int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
     if (divisor == 0) {
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
     } else {
       // A division by a non-null constant is valid. We don't need to perform
       // any check, so simply fall through.
@@ -2316,7 +2288,7 @@
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
-    __ B(codegen_->GetLabelOf(successor));
+    __ Bc(codegen_->GetLabelOf(successor));
   }
 }
 
@@ -2339,10 +2311,333 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond,
+                                                            bool is64bit,
+                                                            LocationSummary* locations) {
+  GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    }
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+  int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1);
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond,
+                                                                     bool is64bit,
+                                                                     LocationSummary* locations,
+                                                                     Mips64Label* label) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    }
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqzc(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnezc(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltzc(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgezc(lhs, label);
+        break;
+      case kCondLE:
+        __ Blezc(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtzc(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ Bc(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      rhs_reg = TMP;
+      __ LoadConst64(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beqc(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bnec(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Bltc(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bgec(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bgec(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Bltc(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltuc(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeuc(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeuc(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltuc(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond,
+                                                                bool gt_bias,
+                                                                Primitive::Type type,
+                                                                LocationSummary* locations,
+                                                                Mips64Label* label) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  if (type == Primitive::kPrimFloat) {
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
                                                            size_t condition_input_index,
-                                                           Label* true_target,
-                                                           Label* false_target) {
+                                                           Mips64Label* true_target,
+                                                           Mips64Label* false_target) {
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -2352,12 +2647,12 @@
     // Constant condition, statically compared against 1.
     if (cond->AsIntConstant()->IsOne()) {
       if (true_target != nullptr) {
-        __ B(true_target);
+        __ Bc(true_target);
       }
     } else {
       DCHECK(cond->AsIntConstant()->IsZero());
       if (false_target != nullptr) {
-        __ B(false_target);
+        __ Bc(false_target);
       }
     }
     return;
@@ -2384,127 +2679,34 @@
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
     HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    Mips64Label* branch_target = true_target;
 
-    GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    GpuRegister rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<GpuRegister>();
-    }
-
-    IfCondition if_cond;
-    Label* non_fallthrough_target;
     if (true_target == nullptr) {
       if_cond = condition->GetOppositeCondition();
-      non_fallthrough_target = false_target;
-    } else {
-      if_cond = condition->GetCondition();
-      non_fallthrough_target = true_target;
+      branch_target = false_target;
     }
 
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqzc(lhs, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bnezc(lhs, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Bltzc(lhs, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bgezc(lhs, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Blezc(lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Bgtzc(lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqzc(lhs, non_fallthrough_target);  // <= 0 if zero
-          break;
-        case kCondA:
-          __ Bnezc(lhs, non_fallthrough_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ B(non_fallthrough_target);  // always true
-          break;
-      }
-    } else {
-      if (use_imm) {
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      // It looks like we can get here with lhs == rhs. Should that be possible at all?
-      // Mips R6 requires lhs != rhs for compact branches.
-      if (lhs == rhs_reg) {
-        DCHECK(!use_imm);
-        switch (if_cond) {
-          case kCondEQ:
-          case kCondGE:
-          case kCondLE:
-          case kCondBE:
-          case kCondAE:
-            // if lhs == rhs for a positive condition, then it is a branch
-            __ B(non_fallthrough_target);
-            break;
-          case kCondNE:
-          case kCondLT:
-          case kCondGT:
-          case kCondB:
-          case kCondA:
-            // if lhs == rhs for a negative condition, then it is a NOP
-            break;
-        }
-      } else {
-        switch (if_cond) {
-          case kCondEQ:
-            __ Beqc(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondNE:
-            __ Bnec(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondLT:
-            __ Bltc(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondGE:
-            __ Bgec(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondLE:
-            __ Bgec(rhs_reg, lhs, non_fallthrough_target);
-            break;
-          case kCondGT:
-            __ Bltc(rhs_reg, lhs, non_fallthrough_target);
-            break;
-          case kCondB:
-            __ Bltuc(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondAE:
-            __ Bgeuc(lhs, rhs_reg, non_fallthrough_target);
-            break;
-          case kCondBE:
-            __ Bgeuc(rhs_reg, lhs, non_fallthrough_target);
-            break;
-          case kCondA:
-            __ Bltuc(rhs_reg, lhs, non_fallthrough_target);
-            break;
-        }
-      }
+    switch (type) {
+      default:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
     }
   }
 
   // If neither branch falls through (case 3), the conditional branch to `true_target`
   // was already emitted (case 2) and we need to emit a jump to `false_target`.
   if (true_target != nullptr && false_target != nullptr) {
-    __ B(false_target);
+    __ Bc(false_target);
   }
 }
 
@@ -2518,9 +2720,9 @@
 void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
-  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+  Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
       nullptr : codegen_->GetLabelOf(true_successor);
-  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+  Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
       nullptr : codegen_->GetLabelOf(false_successor);
   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
@@ -2543,6 +2745,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
                                             const FieldInfo& field_info ATTRIBUTE_UNUSED) {
   LocationSummary* locations =
@@ -2695,7 +2905,7 @@
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  Label done;
+  Mips64Label done;
 
   // Return 0 if `obj` is null.
   // TODO: Avoid this check if we know `obj` is not null.
@@ -2790,6 +3000,7 @@
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -2822,9 +3033,9 @@
   // sorted out.
   if (invoke->HasCurrentMethodInput()) {
     LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
     if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
     }
   }
 }
@@ -2882,7 +3093,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
@@ -2894,7 +3105,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       GpuRegister reg = temp.AsRegister<GpuRegister>();
       GpuRegister method_reg;
       if (current_method.IsRegister()) {
@@ -2924,13 +3135,14 @@
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Jialc(&frame_entry_label_, T9);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
       __ LoadConst64(T9, invoke->GetDirectCodePtr());
       // LR()
       __ Jalr(T9);
+      __ Nop();
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
@@ -2947,6 +3159,7 @@
                             kMips64WordSize).Int32Value());
       // T9()
       __ Jalr(T9);
+      __ Nop();
       break;
   }
   DCHECK(!IsLeafMethod());
@@ -2970,8 +3183,13 @@
 }
 
 void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
+
   GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
@@ -2979,8 +3197,7 @@
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
@@ -2988,6 +3205,7 @@
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -3016,6 +3234,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3027,22 +3246,27 @@
     __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
-    __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    __ LoadFromOffset(
+        kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
-    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqzc(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqzc(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3079,26 +3303,31 @@
 }
 
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = load->IsInDexCache()
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
   __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                     ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ LoadFromOffset(
+      kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
   // TODO: We will need a read barrier here.
-  __ Beqzc(out, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+
+  if (!load->IsInDexCache()) {
+    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ Beqzc(out, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
@@ -3132,7 +3361,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
@@ -3266,15 +3499,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  // Move an uint16_t value to a register.
-  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
@@ -3454,6 +3684,11 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
     default:
@@ -3487,6 +3722,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3763,6 +4008,11 @@
                               conversion,
                               conversion->GetDexPc(),
                               nullptr);
+      if (result_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      } else {
+        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
@@ -3778,6 +4028,19 @@
                             conversion,
                             conversion->GetDexPc(),
                             nullptr);
+    if (result_type != Primitive::kPrimLong) {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+      }
+    } else {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+      }
+    }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -3820,83 +4083,83 @@
 }
 
 void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
@@ -3925,22 +4188,39 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
+  // Create a set of compare/jumps.
+  GpuRegister temp_reg = TMP;
+  if (IsInt<16>(-lower_bound)) {
+    __ Addiu(temp_reg, value_reg, -lower_bound);
+  } else {
+    __ LoadConst32(AT, -lower_bound);
+    __ Addu(temp_reg, value_reg, AT);
+  }
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqzc(value_reg, succ);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beqc(value_reg, TMP, succ);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
   // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+    __ Bc(codegen_->GetLabelOf(default_block));
   }
 }
 
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index a078dd1..60ff96d 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -158,12 +158,12 @@
  public:
   SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
 
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
+  Mips64Label* GetEntryLabel() { return &entry_label_; }
+  Mips64Label* GetExitLabel() { return &exit_label_; }
 
  private:
-  Label entry_label_;
-  Label exit_label_;
+  Mips64Label entry_label_;
+  Mips64Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
 };
@@ -189,6 +189,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -224,6 +225,7 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -231,12 +233,22 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
-                             Label* true_target,
-                             Label* false_target);
+                             Mips64Label* true_target,
+                             Mips64Label* false_target);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations);
+  void GenerateIntLongCompareAndBranch(IfCondition cond,
+                                       bool is64bit,
+                                       LocationSummary* locations,
+                                       Mips64Label* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  Mips64Label* label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
@@ -265,7 +277,7 @@
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
 
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
-    return GetLabelOf(block)->Position();
+    return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
@@ -298,12 +310,12 @@
     return isa_features_;
   }
 
-  Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_, block);
+  Mips64Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Mips64Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_ = CommonInitializeLabels<Label>();
+    block_labels_ = CommonInitializeLabels<Mips64Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -349,8 +361,8 @@
 
  private:
   // Labels for each block that will be compiled.
-  Label* block_labels_;  // Indexed by block id.
-  Label frame_entry_label_;
+  Mips64Label* block_labels_;  // Indexed by block id.
+  Mips64Label frame_entry_label_;
   LocationsBuilderMIPS64 location_builder_;
   InstructionCodeGeneratorMIPS64 instruction_visitor_;
   ParallelMoveResolverMIPS64 move_resolver_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 999306c..fd18917 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
-
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -67,6 +66,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -93,6 +93,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -152,6 +153,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -177,6 +179,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -222,6 +225,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
     RestoreLiveRegisters(codegen, locations);
 
@@ -257,6 +261,11 @@
     x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                           : QUICK_ENTRY_POINT(pInitializeType),
                                at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -368,6 +377,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -410,6 +420,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -422,6 +433,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
  public:
@@ -443,7 +504,7 @@
     // to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -459,7 +520,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -601,14 +664,18 @@
 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1487,7 +1554,7 @@
 
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
-    // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
     } else if (rhs.IsConstant()) {
@@ -1549,6 +1616,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1592,7 +1667,7 @@
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1625,7 +1700,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1686,83 +1761,83 @@
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -1820,7 +1895,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
@@ -1908,7 +1983,7 @@
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
-      invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any());
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
     }
     return;
   }
@@ -1917,8 +1992,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(),
-                                    Location::RequiresRegister());
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 
   if (codegen_->IsBaseline()) {
@@ -1926,9 +2000,9 @@
     // needs a register. We therefore do not require a register for it, and let
     // the code generation of the invoke handle it.
     LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
     if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
     }
   }
 }
@@ -1958,6 +2032,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -2460,6 +2539,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2468,6 +2548,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -3298,11 +3379,13 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       } else {
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       }
       break;
     }
@@ -3740,6 +3823,92 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimLong:
+      // Add the temporary needed.
+      locations->AddTemp(Location::RequiresRegister());
+      FALLTHROUGH_INTENDED;
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  if (ror->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    if (second.IsRegister()) {
+      Register second_reg = second.AsRegister<Register>();
+      __ rorl(first_reg, second_reg);
+    } else {
+      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+      __ rorl(first_reg, imm);
+    }
+    return;
+  }
+
+  DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
+  Register first_reg_lo = first.AsRegisterPairLow<Register>();
+  Register first_reg_hi = first.AsRegisterPairHigh<Register>();
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  if (second.IsRegister()) {
+    Register second_reg = second.AsRegister<Register>();
+    DCHECK_EQ(second_reg, ECX);
+    __ movl(temp_reg, first_reg_hi);
+    __ shrd(first_reg_hi, first_reg_lo, second_reg);
+    __ shrd(first_reg_lo, temp_reg, second_reg);
+    __ movl(temp_reg, first_reg_hi);
+    __ testl(second_reg, Immediate(32));
+    __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
+    __ cmovl(kNotEqual, first_reg_lo, temp_reg);
+  } else {
+    int32_t shift_amt =
+      CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue;
+    if (shift_amt == 0) {
+      // Already fine.
+      return;
+    }
+    if (shift_amt == 32) {
+      // Just swap.
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+      return;
+    }
+
+    Immediate imm(shift_amt);
+    // Save the constents of the low value.
+    __ movl(temp_reg, first_reg_lo);
+
+    // Shift right into low, feeding bits from high.
+    __ shrd(first_reg_lo, first_reg_hi, imm);
+
+    // Shift right into high, feeding bits from the original low.
+    __ shrd(first_reg_hi, temp_reg, imm);
+
+    // Swap if needed.
+    if (shift_amt > 32) {
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+    }
+  }
+}
+
 void LocationsBuilderX86::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
@@ -3769,19 +3938,18 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3798,13 +3966,13 @@
 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3988,7 +4156,7 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
@@ -3996,7 +4164,7 @@
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -4032,7 +4200,7 @@
 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                  Register temp) {
   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
-  Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   if (!invoke->GetLocations()->Intrinsified()) {
     return location.AsRegister<Register>();
   }
@@ -4063,7 +4231,7 @@
       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
@@ -4084,7 +4252,7 @@
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -4136,12 +4304,16 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -4238,9 +4410,14 @@
 
   if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
     // Long values can be loaded atomically into an XMM using movsd.
-    // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
-    // and then copy the XMM into the output 32bits at a time).
+    // So we use an XMM register as a temp to achieve atomicity (first
+    // load the temp into the XMM and then copy the XMM into the
+    // output, 32 bits at a time).
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4278,9 +4455,32 @@
     }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ movl(out.AsRegister<Register>(), Address(base, offset));
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<Register>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
     case Primitive::kPrimLong: {
@@ -4315,17 +4515,20 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4390,7 +4593,7 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4495,7 +4698,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4676,6 +4879,11 @@
             Location::kOutputOverlap :
             Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -4683,12 +4891,13 @@
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4700,7 +4909,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4712,7 +4921,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4724,7 +4933,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4734,13 +4943,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4750,20 +4955,56 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
+      DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+        __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
+        __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
       } else {
-        __ movl(out.AsRegisterPairLow<Register>(),
+        __ movl(out_loc.AsRegisterPairLow<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(),
+        __ movl(out_loc.AsRegisterPairHigh<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
       }
       break;
@@ -4771,7 +5012,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4783,7 +5024,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4798,23 +5039,12 @@
       UNREACHABLE();
   }
 
-  if (type != Primitive::kPrimLong) {
+  if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long arrays, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
-  }
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -4856,7 +5086,7 @@
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
     // Ensure the card is in a byte register.
-    locations->AddTemp(Location::RegisterLocation(ECX));  // Possibly used for read barrier too.
+    locations->AddTemp(Location::RegisterLocation(ECX));
   }
 }
 
@@ -4946,12 +5176,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -5232,8 +5462,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
+  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5503,6 +5733,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -5513,43 +5744,33 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leal(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movl(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leal(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5582,49 +5803,36 @@
 }
 
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leal(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leal(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+    codegen_->AddSlowPath(slow_path);
+    __ testl(out, out);
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
   }
-
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
@@ -5661,6 +5869,15 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5688,21 +5905,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5718,10 +5936,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5742,17 +5959,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5781,17 +5989,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5815,17 +6014,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5869,6 +6059,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
                                                                     /* is_fatal */ false);
@@ -5921,27 +6118,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5961,8 +6158,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5984,18 +6180,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -6008,8 +6194,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -6035,18 +6220,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -6058,8 +6233,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6076,19 +6250,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -6102,8 +6265,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -6111,8 +6273,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6129,6 +6290,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -6150,6 +6318,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -6285,14 +6458,226 @@
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
-                                           Location out,
-                                           Location ref,
-                                           Location obj,
-                                           uint32_t offset,
-                                           Location index) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leal(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 const Address& src,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -6306,57 +6691,41 @@
       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                Location out,
-                                                Location ref,
-                                                Location obj,
-                                                uint32_t offset,
-                                                Location index) {
+void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<Register>());
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                  Location out,
-                                                  Location root) {
+void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6390,31 +6759,67 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+                                                              int32_t lower_bound,
+                                                              uint32_t num_entries,
+                                                              HBasicBlock* switch_block,
+                                                              HBasicBlock* default_block) {
+  // Figure out the correct compare values and jump conditions.
+  // Handle the first compare/branch as a special case because it might
+  // jump to the default case.
+  DCHECK_GT(num_entries, 2u);
+  Condition first_condition;
+  uint32_t index;
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  if (lower_bound != 0) {
+    first_condition = kLess;
+    __ cmpl(value_reg, Immediate(lower_bound));
+    __ j(first_condition, codegen_->GetLabelOf(default_block));
+    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+    index = 1;
+  } else {
+    // Handle all the compare/jumps below.
+    first_condition = kBelow;
+    index = 0;
+  }
+
+  // Handle the rest of the compare/jumps.
+  for (; index + 1 < num_entries; index += 2) {
+    int32_t compare_to_value = lower_bound + index + 1;
+    __ cmpl(value_reg, Immediate(compare_to_value));
+    // Jump to successors[index] if value < case_value[index].
+    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+    // Jump to successors[index + 1] if value == case_value[index + 1].
+    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+  }
+
+  if (index != num_entries) {
+    // There are an odd number of entries. Handle the last one.
+    DCHECK_EQ(index + 1, num_entries);
+    __ cmpl(value_reg, Immediate(lower_bound + index));
+    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+    __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+  GenPackedSwitchWithCompares(value_reg,
+                              lower_bound,
+                              num_entries,
+                              switch_instr->GetBlock(),
+                              switch_instr->GetDefaultBlock());
+}
+
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6429,11 +6834,20 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_instr->GetBlock(),
+                                default_block);
+    return;
+  }
+
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6445,7 +6859,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1);
+  DCHECK_GE(num_entries, 1u);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -6670,7 +7084,7 @@
 // TODO: target as memory.
 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
   if (!target.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 064051c..3d34317 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
@@ -166,6 +167,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -195,6 +197,11 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
+  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+  // generates less code/data with a small num_entries.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -207,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
@@ -214,11 +222,44 @@
   void GenerateShlLong(const Location& loc, int shift);
   void GenerateShrLong(const Location& loc, int shift);
   void GenerateUShrLong(const Location& loc, int shift);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -236,6 +277,11 @@
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -354,6 +400,8 @@
                   Register value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -395,7 +443,26 @@
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -412,23 +479,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -438,9 +507,31 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+  void MemoryFence(bool non_temporal = false) {
+    if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+      assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4088160..ffd8c42 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -65,6 +69,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -91,6 +96,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -149,6 +155,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -203,6 +210,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -240,6 +248,11 @@
                                   at_,
                                   dex_pc_,
                                   this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     Location out = locations->Out();
     // Move the class to the desired location.
@@ -290,6 +303,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
@@ -386,6 +400,7 @@
                                   deoptimize,
                                   deoptimize->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -428,6 +443,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -440,6 +456,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
  public:
@@ -461,7 +527,7 @@
     // reference load to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -477,7 +543,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -618,13 +686,17 @@
 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -715,10 +787,10 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ gs()->movl(temp.AsRegister<CpuRegister>(),
-                    Address::Absolute(invoke->GetStringInitOffset(), true));
+                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
@@ -732,12 +804,12 @@
       pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
                                                   invoke->GetDexCacheArrayOffset());
       __ movq(temp.AsRegister<CpuRegister>(),
-              Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
       // Bind the label at the end of the "movl" insn.
       __ Bind(&pc_relative_dex_cache_patches_.back().label);
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       CpuRegister reg = temp.AsRegister<CpuRegister>();
       if (current_method.IsRegister()) {
@@ -790,12 +862,17 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -886,7 +963,7 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(instruction, slow_path);
-  __ gs()->call(Address::Absolute(entry_point_offset, true));
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1523,6 +1600,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1566,7 +1651,7 @@
 void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1590,7 +1675,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1688,83 +1773,83 @@
 }
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
@@ -1918,7 +2003,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -2646,7 +2731,8 @@
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(
+                        in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
 
@@ -2890,7 +2976,8 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2904,7 +2991,8 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -2979,7 +3067,8 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ subss(first.AsFpuRegister<XmmRegister>(),
@@ -2993,7 +3082,8 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -3100,7 +3190,8 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -3115,7 +3206,8 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -3521,7 +3613,8 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3535,7 +3628,8 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3734,6 +3828,56 @@
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorl(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        __ rorl(first_reg, imm);
+      }
+      break;
+    case Primitive::kPrimLong:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorq(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
+        __ rorq(first_reg, imm);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
   }
 }
 
@@ -3765,22 +3909,19 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
-                           instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3799,13 +3940,13 @@
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3892,15 +4033,15 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
-   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -3935,6 +4076,11 @@
         Location::RequiresRegister(),
         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -3970,12 +4116,36 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
@@ -3996,14 +4166,20 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  if (field_type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // fields, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+  if (is_volatile) {
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4057,7 +4233,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4163,7 +4339,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4340,6 +4516,11 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4347,12 +4528,13 @@
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4364,7 +4546,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4376,7 +4558,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4388,7 +4570,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4398,13 +4580,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4414,9 +4592,46 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        CpuRegister out = out_loc.AsRegister<CpuRegister>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4428,7 +4643,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4440,7 +4655,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4454,20 +4669,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4500,8 +4707,6 @@
     // This first temporary register is possibly used for heap
     // reference poisoning and/or read barrier emission too.
     locations->AddTemp(Location::RequiresRegister());
-    // This second temporary register is possibly used for read
-    // barrier emission too.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -4593,12 +4798,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -4824,8 +5029,8 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ gs()->movq(card, Address::Absolute(
-      Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+                                        /* no_rip */ true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4884,8 +5089,9 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+                                  /* no_rip */ true),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5109,7 +5315,7 @@
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
-  // No need for memory fence, thanks to the X86_64 memory model.
+  // No need for memory fence, thanks to the x86-64 memory model.
 }
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5129,6 +5335,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -5139,43 +5346,31 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leaq(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movq(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leaq(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
-
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5199,53 +5394,41 @@
 }
 
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
-  codegen_->AddSlowPath(slow_path);
-
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leaq(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leaq(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
+  if (!load->IsInDexCache()) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+    codegen_->AddSlowPath(slow_path);
+    __ testl(out, out);
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetExitLabel());
   }
-
-  __ testl(out, out);
-  __ j(kEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+                           /* no_rip */ true);
 }
 
 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5278,6 +5461,15 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
 }
 
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5305,21 +5497,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5335,10 +5528,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5364,17 +5556,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5403,17 +5586,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5437,17 +5611,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5491,6 +5656,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                                        /* is_fatal */ false);
@@ -5543,27 +5715,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5575,7 +5747,7 @@
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done;
+  Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
@@ -5583,8 +5755,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5606,18 +5777,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5630,8 +5791,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5657,18 +5817,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5680,8 +5830,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5698,19 +5847,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5724,8 +5862,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5733,8 +5870,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5751,6 +5887,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -5772,6 +5915,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5889,14 +6037,227 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
-                                              Location out,
-                                              Location ref,
-                                              Location obj,
-                                              uint32_t offset,
-                                              Location index) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                      Location out,
+                                                                      uint32_t offset,
+                                                                      Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                       Location out,
+                                                                       Location obj,
+                                                                       uint32_t offset,
+                                                                       Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                             Location root,
+                                                             CpuRegister obj,
+                                                             uint32_t offset) {
+  CpuRegister root_reg = root.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+                                      /* no_rip */ true),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leaq(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    CpuRegister obj,
+                                                                    const Address& src,
+                                                                    Location temp,
+                                                                    bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86-64 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -5910,57 +6271,41 @@
       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                   Location out,
-                                                   Location ref,
-                                                   Location obj,
-                                                   uint32_t offset,
-                                                   Location index) {
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                     Location out,
-                                                     Location root) {
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -5998,11 +6343,58 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Should we generate smaller inline compare/jumps?
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    // Figure out the correct compare values and jump conditions.
+    // Handle the first compare/branch as a special case because it might
+    // jump to the default case.
+    DCHECK_GT(num_entries, 2u);
+    Condition first_condition;
+    uint32_t index;
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    if (lower_bound != 0) {
+      first_condition = kLess;
+      __ cmpl(value_reg_in, Immediate(lower_bound));
+      __ j(first_condition, codegen_->GetLabelOf(default_block));
+      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+      index = 1;
+    } else {
+      // Handle all the compare/jumps below.
+      first_condition = kBelow;
+      index = 0;
+    }
+
+    // Handle the rest of the compare/jumps.
+    for (; index + 1 < num_entries; index += 2) {
+      int32_t compare_to_value = lower_bound + index + 1;
+      __ cmpl(value_reg_in, Immediate(compare_to_value));
+      // Jump to successors[index] if value < case_value[index].
+      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+      // Jump to successors[index + 1] if value == case_value[index + 1].
+      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+    }
+
+    if (index != num_entries) {
+      // There are an odd number of entries. Handle the last one.
+      DCHECK_EQ(index + 1, num_entries);
+      __ cmpl(value_reg_in, Immediate(lower_bound + index));
+      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+    }
+    return;
+  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -6013,7 +6405,6 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -6163,7 +6554,7 @@
 // TODO: trg as memory.
 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 145b1f3..9995416 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
@@ -47,6 +48,12 @@
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
+// These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
+// If the ART ABI changes, this list must be updated.  It is used to ensure that
+// these are not clobbered by any direct call to native code (such as math intrinsics).
+static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -165,6 +172,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction);
@@ -206,12 +214,46 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               CpuRegister obj,
+                               uint32_t offset);
+
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -318,6 +360,8 @@
                   CpuRegister value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
 
@@ -350,7 +394,26 @@
     return isa_features_;
   }
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -367,23 +430,25 @@
   // When `index` provided (i.e., when it is different from
   // Location::NoLocation()), the offset value passed to
   // artReadBarrierSlow is adjusted to take `index` into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -393,7 +458,7 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
   int ConstantAreaStart() const {
     return constant_area_start_;
@@ -417,7 +482,28 @@
                           int64_t v,
                           HInstruction* instruction);
 
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack. The locked add should not be used for
+  // ordering non-temporal stores.
+  void MemoryFence(bool force_mfence = false) {
+    if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+      assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 57de41f..d970704 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -35,6 +35,7 @@
 #include "code_generator_mips64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
+#include "code_simulator_container.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
@@ -124,26 +125,85 @@
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
 
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+  return (target_isa == kRuntimeISA)
+      // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+      || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+  CodeSimulatorContainer simulator(target_isa);
+  return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnBool();
+}
+
+template <>
+int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt32();
+}
+
+template <>
+int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+                                Expected (*f)(),
+                                bool has_result,
+                                Expected expected) {
+  ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+  // Verify on simulator.
+  CodeSimulatorContainer simulator(target_isa);
+  if (simulator.CanSimulate()) {
+    Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+
+  // Verify on hardware.
+  if (CanExecuteOnHardware(target_isa)) {
+    Expected result = f();
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+}
+
 template <typename Expected>
 static void Run(const InternalCodeAllocator& allocator,
                 const CodeGenerator& codegen,
                 bool has_result,
                 Expected expected) {
+  InstructionSet target_isa = codegen.GetInstructionSet();
+
   typedef Expected (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-  if (codegen.GetInstructionSet() == kThumb2) {
+  if (target_isa == kThumb2) {
     // For thumb we need the bottom bit set.
     f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
   }
-  Expected result = f();
-  if (has_result) {
-    ASSERT_EQ(expected, result);
-  }
+  VerifyGeneratedCode(target_isa, f, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
+static void RunCodeBaseline(InstructionSet target_isa,
+                            HGraph* graph,
+                            bool has_result,
+                            Expected expected) {
   InternalCodeAllocator allocator;
 
   CompilerOptions compiler_options;
@@ -153,7 +213,7 @@
   // We avoid doing a stack overflow check that requires the runtime being setup,
   // by making sure the compiler knows the methods we are running are leaf methods.
   codegenX86.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86) {
+  if (target_isa == kX86) {
     Run(allocator, codegenX86, has_result, expected);
   }
 
@@ -161,7 +221,7 @@
       ArmInstructionSetFeatures::FromCppDefines());
   TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
   codegenARM.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
+  if (target_isa == kArm || target_isa == kThumb2) {
     Run(allocator, codegenARM, has_result, expected);
   }
 
@@ -169,7 +229,7 @@
       X86_64InstructionSetFeatures::FromCppDefines());
   x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
   codegenX86_64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86_64) {
+  if (target_isa == kX86_64) {
     Run(allocator, codegenX86_64, has_result, expected);
   }
 
@@ -177,7 +237,7 @@
       Arm64InstructionSetFeatures::FromCppDefines());
   arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
   codegenARM64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm64) {
+  if (target_isa == kArm64) {
     Run(allocator, codegenARM64, has_result, expected);
   }
 
@@ -193,7 +253,7 @@
       Mips64InstructionSetFeatures::FromCppDefines());
   mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
   codegenMIPS64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips64) {
+  if (target_isa == kMips64) {
     Run(allocator, codegenMIPS64, has_result, expected);
   }
 }
@@ -221,37 +281,38 @@
 }
 
 template <typename Expected>
-static void RunCodeOptimized(HGraph* graph,
+static void RunCodeOptimized(InstructionSet target_isa,
+                             HGraph* graph,
                              std::function<void(HGraph*)> hook_before_codegen,
                              bool has_result,
                              Expected expected) {
   CompilerOptions compiler_options;
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    TestCodeGeneratorARM codegenARM(graph,
-                                    *ArmInstructionSetFeatures::FromCppDefines(),
-                                    compiler_options);
+  if (target_isa == kArm || target_isa == kThumb2) {
+    std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+        ArmInstructionSetFeatures::FromCppDefines());
+    TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kArm64) {
-    arm64::CodeGeneratorARM64 codegenARM64(graph,
-                                           *Arm64InstructionSetFeatures::FromCppDefines(),
-                                           compiler_options);
+  } else if (target_isa == kArm64) {
+    std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+        Arm64InstructionSetFeatures::FromCppDefines());
+    arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
     RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86) {
+  } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
     RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86_64) {
+  } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips) {
+  } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
     RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips64) {
+  } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
@@ -259,7 +320,10 @@
   }
 }
 
-static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
+static void TestCode(InstructionSet target_isa,
+                     const uint16_t* data,
+                     bool has_result = false,
+                     int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraph* graph = CreateGraph(&arena);
@@ -269,10 +333,13 @@
   ASSERT_TRUE(graph_built);
   // Remove suspend checks, they cannot be executed in this context.
   RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
+  RunCodeBaseline(target_isa, graph, has_result, expected);
 }
 
-static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
+static void TestCodeLong(InstructionSet target_isa,
+                         const uint16_t* data,
+                         bool has_result,
+                         int64_t expected) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraph* graph = CreateGraph(&arena);
@@ -282,108 +349,110 @@
   ASSERT_TRUE(graph_built);
   // Remove suspend checks, they cannot be executed in this context.
   RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
+  RunCodeBaseline(target_isa, graph, has_result, expected);
 }
 
-TEST(CodegenTest, ReturnVoid) {
+class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+
+TEST_P(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG1) {
+TEST_P(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG2) {
+TEST_P(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG3) {
+TEST_P(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(data1);
+  TestCode(GetParam(), data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(data2);
+  TestCode(GetParam(), data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(data3);
+  TestCode(GetParam(), data3);
 }
 
-TEST(CodegenTest, CFG4) {
+TEST_P(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG5) {
+TEST_P(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, IntConstant) {
+TEST_P(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, Return1) {
+TEST_P(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
-TEST(CodegenTest, Return2) {
+TEST_P(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
-TEST(CodegenTest, Return3) {
+TEST_P(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf1) {
+TEST_P(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -391,10 +460,10 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf2) {
+TEST_P(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -402,12 +471,12 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST(CodegenTest, TEST_NAME) {                          \
+TEST_P(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -416,7 +485,7 @@
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(data, true, EXPECTED_OUTPUT);                \
+  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -432,7 +501,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST(CodegenTest, TEST_NAME) {                                           \
+TEST_P(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -443,7 +512,7 @@
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
+  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -482,7 +551,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST(CodegenTest, IntToLongOfLongToInt) {
+TEST_P(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -496,48 +565,48 @@
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(data, true, 1);
+  TestCodeLong(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnAdd1) {
+TEST_P(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd2) {
+TEST_P(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd3) {
+TEST_P(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd4) {
+TEST_P(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, NonMaterializedCondition) {
+TEST_P(CodegenTest, NonMaterializedCondition) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -583,30 +652,30 @@
     block->InsertInstructionBefore(move, block->GetLastInstruction());
   };
 
-  RunCodeOptimized(graph, hook_before_codegen, true, 0);
+  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
 }
 
-TEST(CodegenTest, ReturnMulInt) {
+TEST_P(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulInt2addr) {
+TEST_P(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong) {
+TEST_P(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 0 << 12 | 1 << 8,
@@ -615,10 +684,10 @@
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(data, true, 12);
+  TestCodeLong(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong2addr) {
+TEST_P(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0 << 8,
     Instruction::CONST_4 | 0 << 12 | 1 << 8,
@@ -627,28 +696,28 @@
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(data, true, 12);
+  TestCodeLong(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit8) {
+TEST_P(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit16) {
+TEST_P(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, MaterializedCondition1) {
+TEST_P(CodegenTest, MaterializedCondition1) {
   // Check that condition are materialized correctly. A materialized condition
   // should yield `1` if it evaluated to true, and `0` otherwise.
   // We force the materialization of comparisons for different combinations of
@@ -689,11 +758,11 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
   }
 }
 
-TEST(CodegenTest, MaterializedCondition2) {
+TEST_P(CodegenTest, MaterializedCondition2) {
   // Check that HIf correctly interprets a materialized condition.
   // We force the materialization of comparisons for different combinations of
   // inputs. An HIf takes the materialized combination as input and returns a
@@ -755,31 +824,35 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
   }
 }
 
-TEST(CodegenTest, ReturnDivIntLit8) {
+TEST_P(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnDivInt2Addr) {
+TEST_P(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 2);
+  TestCode(GetParam(), data, true, 2);
 }
 
 // Helper method.
-static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type) {
+static void TestComparison(IfCondition condition,
+                           int64_t i,
+                           int64_t j,
+                           Primitive::Type type,
+                           const InstructionSet target_isa) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -862,46 +935,78 @@
 
   auto hook_before_codegen = [](HGraph*) {
   };
-  RunCodeOptimized(graph, hook_before_codegen, true, expected_result);
+  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
 }
 
-TEST(CodegenTest, ComparisonsInt) {
+TEST_P(CodegenTest, ComparisonsInt) {
+  const InstructionSet target_isa = GetParam();
   for (int64_t i = -1; i <= 1; i++) {
     for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
     }
   }
 }
 
-TEST(CodegenTest, ComparisonsLong) {
+TEST_P(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
+  const InstructionSet target_isa = GetParam();
+  if (target_isa == kMips || target_isa == kMips64) {
+    return;
+  }
+
   for (int64_t i = -1; i <= 1; i++) {
     for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
     }
   }
 }
 
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+INSTANTIATE_TEST_CASE_P(MultipleTargets,
+                        CodegenTest,
+                        ::testing::ValuesIn(GetTargetISAs()));
+
 }  // namespace art
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index e1a8c9c..10d8343 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 
+#include "code_generator.h"
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -201,6 +202,11 @@
     return true;
   }
 
+  // Our code generator ensures shift distances are within an encodable range.
+  if (instr->IsRor()) {
+    return true;
+  }
+
   int64_t value = CodeGenerator::GetInt64ValueOf(constant);
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
@@ -255,6 +261,67 @@
   return true;
 }
 
+static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_SHIFT;
+  }
+}
+
+static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_EXTEND;
+  }
+}
+
+static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+  if (instruction->IsTypeConversion()) {
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    Primitive::Type result_type = conversion->GetResultType();
+    Primitive::Type input_type = conversion->GetInputType();
+    // We don't expect to see the same type as input and result.
+    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+        (result_type != input_type);
+  } else {
+    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+  }
+}
+
+static inline bool HasShifterOperand(HInstruction* instr) {
+  // `neg` instructions are an alias of `sub` using the zero register as the
+  // first register input.
+  bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
+      instr->IsOr() || instr->IsSub() || instr->IsXor();
+  return res;
+}
+
+static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+  DCHECK(HasShifterOperand(instruction));
+  // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
+  // does *not* support extension. This is because the `extended register` form
+  // of the `sub` instruction interprets the left register with code 31 as the
+  // stack pointer and not the zero register. (So does the `immediate` form.) In
+  // the other form `shifted register, the register with code 31 is interpreted
+  // as the zero register.
+  return instruction->IsAdd() || instruction->IsSub();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 02e5dab..67ff87a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -165,6 +165,7 @@
       if (!inst->HasSideEffects()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
+          && !inst->IsNativeDebugInfo()
           // If we added an explicit barrier then we should keep it.
           && !inst->IsMemoryBarrier()
           && !inst->IsParameterValue()
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..015f910
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 5814d75..dfc363f 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -735,26 +735,31 @@
     }
   }
 
-  // Test phi equivalents. There should not be two of the same type and they
-  // should only be created for constants which were untyped in DEX.
-  for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-    HPhi* other_phi = phi_it.Current()->AsPhi();
-    if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
-      if (phi->GetType() == other_phi->GetType()) {
-        std::stringstream type_str;
-        type_str << phi->GetType();
-        AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
-                              phi->GetId(),
-                              phi->GetRegNumber(),
-                              type_str.str().c_str()));
-      } else {
-        ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
-        if (!IsConstantEquivalent(phi, other_phi, &visited)) {
-          AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
-                                "are not equivalents of constants.",
+  // Test phi equivalents. There should not be two of the same type and they should only be
+  // created for constants which were untyped in DEX. Note that this test can be skipped for
+  // a synthetic phi (indicated by lack of a virtual register).
+  if (phi->GetRegNumber() != kNoRegNumber) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
+      HPhi* other_phi = phi_it.Current()->AsPhi();
+      if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+        if (phi->GetType() == other_phi->GetType()) {
+          std::stringstream type_str;
+          type_str << phi->GetType();
+          AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
                                 phi->GetId(),
-                                other_phi->GetId(),
-                                phi->GetRegNumber()));
+                                phi->GetRegNumber(),
+                                type_str.str().c_str()));
+        } else {
+          ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
+          if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+            AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+                                  "are not equivalents of constants.",
+                                  phi->GetId(),
+                                  other_phi->GetId(),
+                                  phi->GetRegNumber()));
+          }
         }
       }
     }
@@ -845,7 +850,7 @@
 
 void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
   VisitInstruction(op);
-  if (op->IsUShr() || op->IsShr() || op->IsShl()) {
+  if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
     if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
       AddError(StringPrintf(
           "Shift operation %s %d has a non-int kind second input: "
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index af3ecb1..e9fdb84 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -393,15 +393,18 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
     if (invoke->IsStatic()) {
       StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
     }
   }
 
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+  }
+
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
     StartAttributeStream("field_type") << field_access->GetFieldType();
   }
@@ -422,6 +425,19 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
+    if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+      StartAttributeStream("shift") << instruction->GetShiftAmount();
+    }
+  }
+
+  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
   bool IsPass(const char* name) {
     return strcmp(pass_name_, name) == 0;
   }
@@ -503,6 +519,18 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
+      } else if (instruction->IsNullConstant()) {
+        // The NullConstant may be added to the graph during other passes that happen between
+        // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+        // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+        // So we should check NullConstants for validity only after reference type propagation.
+        //
+        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
+        // for the benefits.
+        StartAttributeStream("klass") << "not_set";
+        DCHECK(!is_after_pass_
+            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
+            << " Expected a valid rti after reference type propagation";
       } else {
         DCHECK(!is_after_pass_)
             << "Expected a valid rti after reference type propagation";
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c36de84..4af111b 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -377,9 +377,10 @@
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
-    set->Kill(current->GetSideEffects());
     // Save the next instruction in case `current` is removed from the graph.
     HInstruction* next = current->GetNext();
+    // Do not kill the set with the side effects of the instruction just now: if
+    // the instruction is GVN'ed, we don't need to kill.
     if (current->CanBeMoved()) {
       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
         // For commutative ops, (x op y) will be treated the same as (y op x)
@@ -395,8 +396,11 @@
         current->ReplaceWith(existing);
         current->GetBlock()->RemoveInstruction(current);
       } else {
+        set->Kill(current->GetSideEffects());
         set->Add(current);
       }
+    } else {
+      set->Kill(current->GetSideEffects());
     }
     current = next;
   }
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index de60cf2..78cb7d4 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -28,7 +28,7 @@
 TEST(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -113,7 +113,7 @@
 TEST(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -196,7 +196,7 @@
 TEST(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -319,7 +319,7 @@
 TEST(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC();
 
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index fdf8cc9..19e6cbd 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -71,10 +71,10 @@
 }
 
 void HInductionVarAnalysis::Run() {
-  // Detects sequence variables (generalized induction variables) during an inner-loop-first
-  // traversal of all loops using Gerlek's algorithm. The order is only relevant if outer
-  // loops would use induction information of inner loops (not currently done).
-  for (HPostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
+  // Detects sequence variables (generalized induction variables) during an outer to inner
+  // traversal of all loops using Gerlek's algorithm. The order is important to enable
+  // range analysis on outer loop while visiting inner loops.
+  for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
     HBasicBlock* graph_block = it_graph.Current();
     if (graph_block->IsLoopHeader()) {
       VisitLoop(graph_block->GetLoopInformation());
@@ -705,7 +705,8 @@
       return loop_it->second;
     }
   }
-  if (loop->IsLoopInvariant(instruction, true)) {
+  if (loop->IsDefinedOutOfTheLoop(instruction)) {
+    DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader()));
     InductionInfo* info = CreateInvariantFetch(instruction);
     AssignInfo(loop, instruction, info);
     return info;
@@ -744,8 +745,7 @@
       if (value == 1) {
         return b;
       } else if (value == -1) {
-        op = kNeg;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, b);
       }
     }
   }
@@ -762,26 +762,52 @@
       if (value == 1) {
         return a;
       } else if (value == -1) {
-        op = kNeg;
-        b = a;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, a);
       }
     }
   } else if (b->operation == kNeg) {
     // Simplify a + (-b) = a - b, a - (-b) = a + b, -(-b) = b.
     if (op == kAdd) {
-      op = kSub;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kSub, a, b->op_b);
     } else if (op == kSub) {
-      op = kAdd;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kAdd, a, b->op_b);
     } else if (op == kNeg) {
       return b->op_b;
     }
+  } else if (b->operation == kSub) {
+    // Simplify - (a - b) = b - a.
+    if (op == kNeg) {
+      return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a);
+    }
   }
   return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr);
 }
 
+bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
+  if (info != nullptr && info->induction_class == kInvariant) {
+    // A direct constant fetch.
+    if (info->operation == kFetch) {
+      DCHECK(info->fetch);
+      if (info->fetch->IsIntConstant()) {
+        *value = info->fetch->AsIntConstant()->GetValue();
+        return true;
+      } else if (info->fetch->IsLongConstant()) {
+        *value = info->fetch->AsLongConstant()->GetValue();
+        return true;
+      }
+    }
+    // Use range analysis to resolve compound values.
+    InductionVarRange range(this);
+    int32_t min_val = 0;
+    int32_t max_val = 0;
+    if (range.IsConstantRange(info, &min_val, &max_val) && min_val == max_val) {
+      *value = min_val;
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1,
                                            InductionInfo* info2) {
   // Test structural equality only, without accounting for simplifications.
@@ -797,33 +823,9 @@
   return info1 == info2;
 }
 
-bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
-  if (info != nullptr && info->induction_class == kInvariant) {
-    // A direct constant fetch.
-    if (info->operation == kFetch) {
-      DCHECK(info->fetch);
-      if (info->fetch->IsIntConstant()) {
-        *value = info->fetch->AsIntConstant()->GetValue();
-        return true;
-      } else if (info->fetch->IsLongConstant()) {
-        *value = info->fetch->AsLongConstant()->GetValue();
-        return true;
-      }
-    }
-    // Use range analysis to resolve compound values.
-    int32_t range_value;
-    if (InductionVarRange::GetConstant(info, &range_value)) {
-      *value = range_value;
-      return true;
-    }
-  }
-  return false;
-}
-
 std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
   if (info != nullptr) {
     if (info->induction_class == kInvariant) {
-      int64_t value = -1;
       std::string inv = "(";
       inv += InductionToString(info->op_a);
       switch (info->operation) {
@@ -839,8 +841,10 @@
         case kGE:    inv += " >= "; break;
         case kFetch:
           DCHECK(info->fetch);
-          if (IsIntAndGet(info, &value)) {
-            inv += std::to_string(value);
+          if (info->fetch->IsIntConstant()) {
+            inv += std::to_string(info->fetch->AsIntConstant()->GetValue());
+          } else if (info->fetch->IsLongConstant()) {
+            inv += std::to_string(info->fetch->AsLongConstant()->GetValue());
           } else {
             inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName();
           }
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index cf35409..84d5d82 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -188,9 +188,11 @@
   InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
 
+  // Constants.
+  bool IsIntAndGet(InductionInfo* info, int64_t* value);
+
   // Helpers.
   static bool InductionEqual(InductionInfo* info1, InductionInfo* info2);
-  static bool IsIntAndGet(InductionInfo* info, int64_t* value);
   static std::string InductionToString(InductionInfo* info);
 
   // TODO: fine tune the following data structures, only keep relevant data.
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index b40ef5a..ae15fcf 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -119,6 +119,17 @@
   }
 }
 
+bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const {
+  Value v1 = RefineOuter(*min_val, /* is_min */ true);
+  Value v2 = RefineOuter(*max_val, /* is_min */ false);
+  if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
+    *min_val = v1;
+    *max_val = v2;
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -156,7 +167,7 @@
 // Private class methods.
 //
 
-bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
   if (info != nullptr) {
     if (info->induction_class == HInductionVarAnalysis::kLinear) {
       return true;
@@ -167,7 +178,7 @@
   return false;
 }
 
-bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
   if (trip != nullptr) {
     if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
       return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
@@ -177,7 +188,7 @@
   return false;
 }
 
-bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
   if (trip != nullptr) {
     if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
       return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
@@ -187,10 +198,57 @@
   return false;
 }
 
+InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                                                      HInductionVarAnalysis::InductionInfo* trip,
+                                                      bool in_body,
+                                                      bool is_min) const {
+  // Detect common situation where an offset inside the trip count cancels out during range
+  // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding
+  // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information
+  // with intermediate results that only incorporate single instructions.
+  if (trip != nullptr) {
+    HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
+    if (trip_expr->operation == HInductionVarAnalysis::kSub) {
+      int32_t min_value = 0;
+      int32_t stride_value = 0;
+      if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
+        if (!is_min && stride_value == 1) {
+          // Test original trip's negative operand (trip_expr->op_b) against
+          // the offset of the linear induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) {
+            // Analyze cancelled trip with just the positive operand (trip_expr->op_a).
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class, trip->operation, trip_expr->op_a, trip->op_b, nullptr);
+            return GetVal(&cancelled_trip, trip, in_body, is_min);
+          }
+        } else if (is_min && stride_value == -1) {
+          // Test original trip's positive operand (trip_expr->op_a) against
+          // the offset of the linear induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) {
+            // Analyze cancelled trip with just the negative operand (trip_expr->op_b).
+            HInductionVarAnalysis::InductionInfo neg(
+                HInductionVarAnalysis::kInvariant,
+                HInductionVarAnalysis::kNeg,
+                nullptr,
+                trip_expr->op_b,
+                nullptr);
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class, trip->operation, &neg, trip->op_b, nullptr);
+            return SubValue(Value(0), GetVal(&cancelled_trip, trip, in_body, !is_min));
+          }
+        }
+      }
+    }
+  }
+  // General rule of linear induction a * i + b, for normalized 0 <= i < TC.
+  return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
+                  GetVal(info->op_b, trip, in_body, is_min));
+}
+
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
-                                                     bool is_min) {
+                                                     bool is_min) const {
   // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
   // more likely range analysis will compare the same instructions as terminal nodes.
   int32_t value;
@@ -202,6 +260,8 @@
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
     }
+  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
   } else if (is_min) {
     // Special case for finding minimum: minimum of trip-count in loop-body is 1.
     if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
@@ -214,7 +274,7 @@
 InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   if (info != nullptr) {
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
@@ -253,13 +313,11 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear:
-        // Linear induction a * i + b, for normalized 0 <= i < TC.
-        return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
-                        GetVal(info->op_b, trip, in_body, is_min));
+      case HInductionVarAnalysis::kLinear: {
+        return GetLinear(info, trip, in_body, is_min);
+      }
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
-        // Merge values in the wrap-around/periodic.
         return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
                         GetVal(info->op_b, trip, in_body, is_min), is_min);
     }
@@ -271,11 +329,17 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  // Try to refine certain failure.
+  if (v1_min.a_constant && v1_max.a_constant) {
+    v1_min = RefineOuter(v1_min, /* is_min */ true);
+    v1_max = RefineOuter(v1_max, /* is_min */ false);
+  }
+  // Positive or negative range?
   if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
@@ -285,7 +349,7 @@
       return is_min ? MulValue(v1_max, v2_min)
                     : MulValue(v1_min, v2_max);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
     // Negative range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
       return is_min ? MulValue(v1_min, v2_max)
@@ -302,11 +366,12 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  // Positive or negative range?
   if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
@@ -316,7 +381,7 @@
       return is_min ? DivValue(v1_max, v2_max)
                     : DivValue(v1_min, v2_min);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
     // Negative range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
       return is_min ? DivValue(v1_min, v2_min)
@@ -329,19 +394,23 @@
   return Value();
 }
 
-bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) {
-  Value v_min = GetVal(info, nullptr, false, /* is_min */ true);
-  Value v_max = GetVal(info, nullptr, false, /* is_min */ false);
-  if (v_min.is_known && v_max.is_known) {
-    if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
-      *value = v_min.b_constant;
+bool InductionVarRange::IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                                        int32_t *min_value,
+                                        int32_t *max_value) const {
+  bool in_body = true;  // no known trip count
+  Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
+  Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
+  do {
+    if (v_min.is_known && v_min.a_constant == 0 && v_max.is_known && v_max.a_constant == 0) {
+      *min_value = v_min.b_constant;
+      *max_value = v_max.b_constant;
       return true;
     }
-  }
+  } while (RefineOuter(&v_min, &v_max));
   return false;
 }
 
-InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant + v2.b_constant;
     if (v1.a_constant == 0) {
@@ -355,7 +424,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant - v2.b_constant;
     if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) {
@@ -369,7 +438,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known) {
     if (v1.a_constant == 0) {
       if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
@@ -384,7 +453,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) {
     if (IsSafeDiv(v1.b_constant, v2.b_constant)) {
       return Value(v1.b_constant / v2.b_constant);
@@ -393,7 +462,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) {
+InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) const {
   if (v1.is_known && v2.is_known) {
     if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
       return Value(v1.instruction, v1.a_constant,
@@ -404,6 +473,25 @@
   return Value();
 }
 
+InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const {
+  if (v.instruction != nullptr) {
+    HLoopInformation* loop =
+        v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    if (loop != nullptr) {
+      // Set up loop information.
+      bool in_body = true;  // use is always in body of outer loop
+      HInductionVarAnalysis::InductionInfo* info =
+          induction_analysis_->LookupInfo(loop, v.instruction);
+      HInductionVarAnalysis::InductionInfo* trip =
+          induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
+      // Try to refine "a x instruction + b" with outer loop range information on instruction.
+      return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)),
+                      Value(v.b_constant));
+    }
+  }
+  return v;
+}
+
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
@@ -412,7 +500,7 @@
                                      /*out*/HInstruction** upper,
                                      /*out*/HInstruction** taken_test,
                                      /*out*/bool* needs_finite_test,
-                                     /*out*/bool* needs_taken_test) {
+                                     /*out*/bool* needs_taken_test) const {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
   if (loop != nullptr) {
     // Set up loop information.
@@ -425,9 +513,13 @@
     }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Determine what tests are needed.
+    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+    // code does not use the trip-count explicitly (since there could be an implicit relation
+    // between e.g. an invariant subscript and a not-taken condition).
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip);
+    *needs_taken_test = IsBodyTripCount(trip);
     // Code generation for taken test: generate the code when requested or otherwise analyze
     // if code generation is feasible when taken test is needed.
     if (taken_test != nullptr) {
@@ -456,7 +548,7 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** result,
                                      bool in_body,
-                                     bool is_min) {
+                                     bool is_min) const {
   if (info != nullptr) {
     // Handle current operation.
     Primitive::Type type = Primitive::kPrimInt;
@@ -512,10 +604,13 @@
             }
             break;
           case HInductionVarAnalysis::kFetch:
-            if (graph != nullptr) {
-              *result = info->fetch;  // already in HIR
+            if (info->fetch->GetType() == type) {
+              if (graph != nullptr) {
+                *result = info->fetch;  // already in HIR
+              }
+              return true;
             }
-            return true;
+            break;
           case HInductionVarAnalysis::kTripCountInLoop:
           case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
@@ -545,29 +640,43 @@
         }
         break;
       case HInductionVarAnalysis::kLinear: {
-          // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
-          // to avoid arithmetic wrap-around situations that are hard to guard against.
-          int32_t stride_value = 0;
-          if (GetConstant(info->op_a, &stride_value)) {
-            if (stride_value == 1 || stride_value == -1) {
-              const bool is_min_a = stride_value == 1 ? is_min : !is_min;
-              if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-                  GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-                if (graph != nullptr) {
-                  HInstruction* oper;
-                  if (stride_value == 1) {
-                    oper = new (graph->GetArena()) HAdd(type, opa, opb);
-                  } else {
-                    oper = new (graph->GetArena()) HSub(type, opb, opa);
-                  }
-                  *result = Insert(block, oper);
+        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+        // to avoid arithmetic wrap-around situations that are hard to guard against.
+        int32_t min_value = 0;
+        int32_t stride_value = 0;
+        if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
                 }
-                return true;
+                *result = Insert(block, oper);
               }
+              return true;
             }
           }
         }
         break;
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (extreme.is_known && extreme.a_constant == 0) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
       default:
         break;
     }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7984871..974b8fb 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -68,6 +68,9 @@
                          /*out*/Value* max_val,
                          /*out*/bool* needs_finite_test);
 
+  /** Refines the values with induction of next outer loop. Returns true on change. */
+  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const;
+
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -113,40 +116,48 @@
                          /*out*/HInstruction** taken_test);
 
  private:
-  //
-  // Private helper methods.
-  //
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
 
-  static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info);
-  static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip);
-  static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip);
+  Value GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                  HInductionVarAnalysis::InductionInfo* trip,
+                  bool in_body,
+                  bool is_min) const;
+  Value GetFetch(HInstruction* instruction,
+                 HInductionVarAnalysis::InductionInfo* trip,
+                 bool in_body,
+                 bool is_min) const;
+  Value GetVal(HInductionVarAnalysis::InductionInfo* info,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
 
-  static Value GetFetch(HInstruction* instruction,
-                        HInductionVarAnalysis::InductionInfo* trip,
-                        bool in_body,
-                        bool is_min);
-  static Value GetVal(HInductionVarAnalysis::InductionInfo* info,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
+  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                       int32_t *min_value,
+                       int32_t *max_value) const;
 
-  static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value);
+  Value AddValue(Value v1, Value v2) const;
+  Value SubValue(Value v1, Value v2) const;
+  Value MulValue(Value v1, Value v2) const;
+  Value DivValue(Value v1, Value v2) const;
+  Value MergeVal(Value v1, Value v2, bool is_min) const;
 
-  static Value AddValue(Value v1, Value v2);
-  static Value SubValue(Value v1, Value v2);
-  static Value MulValue(Value v1, Value v2);
-  static Value DivValue(Value v1, Value v2);
-  static Value MergeVal(Value v1, Value v2, bool is_min);
+  /**
+   * Returns refined value using induction of next outer loop or the input value if no
+   * further refinement is possible.
+   */
+  Value RefineOuter(Value val, bool is_min) const;
 
   /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
@@ -161,15 +172,15 @@
                     /*out*/HInstruction** upper,
                     /*out*/HInstruction** taken_test,
                     /*out*/bool* needs_finite_test,
-                    /*out*/bool* needs_taken_test);
+                    /*out*/bool* needs_taken_test) const;
 
-  static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
-                           HInductionVarAnalysis::InductionInfo* trip,
-                           HGraph* graph,
-                           HBasicBlock* block,
-                           /*out*/HInstruction** result,
-                           bool in_body,
-                           bool is_min);
+  bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
+                    HInductionVarAnalysis::InductionInfo* trip,
+                    HGraph* graph,
+                    HBasicBlock* block,
+                    /*out*/HInstruction** result,
+                    bool in_body,
+                    bool is_min) const;
 
   /** Results of prior induction variable analysis. */
   HInductionVarAnalysis *induction_analysis_;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index c2ba157..5c0bdd7 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -31,9 +31,12 @@
  */
 class InductionVarRangeTest : public testing::Test {
  public:
-  InductionVarRangeTest()  : pool_(), allocator_(&pool_) {
-    graph_ = CreateGraph(&allocator_);
-    iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
+  InductionVarRangeTest()
+      : pool_(),
+        allocator_(&pool_),
+        graph_(CreateGraph(&allocator_)),
+        iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
+        range_(iva_) {
     BuildGraph();
   }
 
@@ -59,6 +62,11 @@
     graph_->AddBlock(exit_block_);
     graph_->SetEntryBlock(entry_block_);
     graph_->SetExitBlock(exit_block_);
+    // Two parameters.
+    x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(x_);
+    y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(y_);
   }
 
   /** Constructs loop with given upper bound. */
@@ -103,7 +111,7 @@
     exit_block_->AddInstruction(new (&allocator_) HExit());
   }
 
-  /** Performs induction variable analysis. */
+  /** Constructs SSA and performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
     ASSERT_TRUE(graph_->TryBuildingSsa());
     iva_->Run();
@@ -180,49 +188,51 @@
   //
 
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
-    return InductionVarRange::NeedsTripCount(info);
+    return range_.NeedsTripCount(info);
   }
 
   bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
-    return InductionVarRange::IsBodyTripCount(trip);
+    return range_.IsBodyTripCount(trip);
   }
 
   bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
-    return InductionVarRange::IsUnsafeTripCount(trip);
+    return range_.IsUnsafeTripCount(trip);
   }
 
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
+    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false);
+    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
   Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
-  bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) {
-    return InductionVarRange::GetConstant(info, value);
+  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                       int32_t* min_value,
+                       int32_t* max_value) {
+    return range_.IsConstantRange(info, min_value, max_value);
   }
 
-  Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); }
-  Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); }
-  Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); }
-  Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); }
-  Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); }
-  Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); }
+  Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); }
+  Value SubValue(Value v1, Value v2) { return range_.SubValue(v1, v2); }
+  Value MulValue(Value v1, Value v2) { return range_.MulValue(v1, v2); }
+  Value DivValue(Value v1, Value v2) { return range_.DivValue(v1, v2); }
+  Value MinValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, true); }
+  Value MaxValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, false); }
 
   // General building fields.
   ArenaPool pool_;
@@ -232,16 +242,17 @@
   HBasicBlock* exit_block_;
   HBasicBlock* loop_preheader_;
   HInductionVarAnalysis* iva_;
+  InductionVarRange range_;
 
   // Instructions.
   HInstruction* condition_;
   HInstruction* increment_;
-  HReturnVoid x_;
-  HReturnVoid y_;
+  HInstruction* x_;
+  HInstruction* y_;
 };
 
 //
-// Tests on static methods.
+// Tests on private methods.
 //
 
 TEST_F(InductionVarRangeTest, TripCountProperties) {
@@ -274,14 +285,14 @@
               GetMin(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(22),
               GetMax(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, -20),
-              GetMin(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, -10),
-              GetMax(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, -20),
+              GetMin(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, -10),
+              GetMax(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(5),
               GetMin(CreateInvariant('+', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(19),
@@ -293,14 +304,14 @@
               GetMin(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-8),
               GetMax(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 10),
-              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 20),
-              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, -1, 10),
+              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 20),
+              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(-25),
               GetMin(CreateInvariant('-', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-11),
@@ -312,8 +323,8 @@
   ExpectEqual(Value(-10), GetMax(CreateInvariant('n', nullptr, CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(10), GetMin(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
   ExpectEqual(Value(20), GetMax(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxMul) {
@@ -336,8 +347,8 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxFetch) {
-  ExpectEqual(Value(&x_, 1, 0), GetMin(CreateFetch(&x_), nullptr));
-  ExpectEqual(Value(&x_, 1, 0), GetMax(CreateFetch(&x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(x_), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
@@ -364,45 +375,70 @@
 TEST_F(InductionVarRangeTest, GetMulMin) {
   ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true));
   ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetMulMax) {
   ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false));
   ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMin) {
   ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true));
   ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, -40), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMax) {
   ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false));
   ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, 40), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
-TEST_F(InductionVarRangeTest, GetConstant) {
-  int32_t value;
-  ASSERT_TRUE(GetConstant(CreateConst(12345), &value));
-  EXPECT_EQ(12345, value);
-  EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value));
+TEST_F(InductionVarRangeTest, IsConstantRange) {
+  int32_t min_value;
+  int32_t max_value;
+  ASSERT_TRUE(IsConstantRange(CreateConst(12345), &min_value, &max_value));
+  EXPECT_EQ(12345, min_value);
+  EXPECT_EQ(12345, max_value);
+  ASSERT_TRUE(IsConstantRange(CreateRange(1, 2), &min_value, &max_value));
+  EXPECT_EQ(1, min_value);
+  EXPECT_EQ(2, max_value);
+  EXPECT_FALSE(IsConstantRange(CreateFetch(x_), &min_value, &max_value));
 }
 
 TEST_F(InductionVarRangeTest, AddValue) {
   ExpectEqual(Value(110), AddValue(Value(10), Value(100)));
-  ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1)));
-  ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1)));
+  ExpectEqual(Value(x_, 3, -5), AddValue(Value(x_, 2, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), AddValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 23), AddValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, 1, 5), AddValue(Value(55), Value(y_, 1, -50)));
   const int32_t max_value = std::numeric_limits<int32_t>::max();
   ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5)));
   ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6)));  // unsafe
@@ -410,11 +446,11 @@
 
 TEST_F(InductionVarRangeTest, SubValue) {
   ExpectEqual(Value(-90), SubValue(Value(10), Value(100)));
-  ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50)));
+  ExpectEqual(Value(-3), SubValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 2, -3), SubValue(Value(x_, 3, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), SubValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 17), SubValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, -4, 105), SubValue(Value(55), Value(y_, 4, -50)));
   const int32_t min_value = std::numeric_limits<int32_t>::min();
   ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5)));
   ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6)));  // unsafe
@@ -422,136 +458,140 @@
 
 TEST_F(InductionVarRangeTest, MulValue) {
   ExpectEqual(Value(1000), MulValue(Value(10), Value(100)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3)));
-  ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 9, 60), MulValue(Value(x_, 3, 20), Value(3)));
+  ExpectEqual(Value(y_, 55, -110), MulValue(Value(55), Value(y_, 1, -2)));
   ExpectEqual(Value(), MulValue(Value(90000), Value(-90000)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(25), DivValue(Value(100), Value(4)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3)));
-  ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), DivValue(Value(x_, 12, 24), Value(3)));
+  ExpectEqual(Value(), DivValue(Value(55), Value(y_, 1, -50)));
   ExpectEqual(Value(), DivValue(Value(1), Value(0)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, MinValue) {
   ExpectEqual(Value(10), MinValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -4), MinValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MinValue(Value(55), Value(y_, 1, -50)));
 }
 
 TEST_F(InductionVarRangeTest, MaxValue) {
   ExpectEqual(Value(100), MaxValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -1), MaxValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -1), MaxValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50)));
 }
 
 //
-// Tests on instance methods.
+// Tests on public methods.
 //
 
 TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
   BuildLoop(0, graph_->GetIntConstant(1000), 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
   BuildLoop(1000, graph_->GetIntConstant(0), -1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(0, parameter, 1);
+  BuildLoop(0, x_, 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
   bool needs_taken_test = true;
 
   // In context of header: upper unknown.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
-  ExpectEqual(Value(parameter, 1, -1), v2);
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  ExpectEqual(Value(x_, 1, -1), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
-  ExpectEqual(Value(parameter, 1, 0), v2);
+  ExpectEqual(Value(x_, 1, 0), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
   HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(
+  EXPECT_FALSE(range_.CanGenerateCode(
       condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
-  ASSERT_TRUE(range.CanGenerateCode(
+  ASSERT_TRUE(range_.CanGenerateCode(
       increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
   EXPECT_FALSE(needs_finite_test);
   EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
@@ -572,7 +612,7 @@
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
 
   // Verify taken-test is 0<V.
-  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsLessThan());
   ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
@@ -581,49 +621,49 @@
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(1000, parameter, -1);
+  BuildLoop(1000, x_, -1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
   bool needs_taken_test = true;
 
   // In context of header: lower unknown.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
-  ExpectEqual(Value(parameter, 1, 1), v1);
+  ExpectEqual(Value(x_, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
-  ExpectEqual(Value(parameter, 1, 0), v1);
+  ExpectEqual(Value(x_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
   HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(
+  EXPECT_FALSE(range_.CanGenerateCode(
       condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
-  ASSERT_TRUE(range.CanGenerateCode(
+  ASSERT_TRUE(range_.CanGenerateCode(
       increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
   EXPECT_FALSE(needs_finite_test);
   EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
-  // Verify lower is 1000-(-(V-1000)-1).
+  // Verify lower is 1000-((1000-V)-1).
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsSub());
   ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
@@ -633,12 +673,10 @@
   ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
   EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
   lower = lower->InputAt(0);
-  ASSERT_TRUE(lower->IsNeg());
-  lower = lower->InputAt(0);
   ASSERT_TRUE(lower->IsSub());
-  EXPECT_TRUE(lower->InputAt(0)->IsParameterValue());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
 
   // Verify upper is 1000-0.
   ASSERT_TRUE(upper != nullptr);
@@ -649,7 +687,7 @@
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
 
   // Verify taken-test is 1000>V.
-  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsGreaterThan());
   ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0363f20..a4dcb3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -171,13 +171,37 @@
                                   const DexFile& dex_file,
                                   uint32_t referrer_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
     return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass());
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    // TODO: deal with proxy classes.
+    return DexFile::kDexNoIndex;
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
+      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    }
+    return cls->GetDexTypeIndex();
+  } else {
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  }
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -192,6 +216,10 @@
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
     mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
@@ -210,53 +238,176 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    return TryInline(invoke_instruction, resolved_method);
+  }
+
+  // Check if we can statically find the method.
+  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+  if (actual_method != nullptr) {
+    return TryInline(invoke_instruction, actual_method);
+  }
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  // Under JIT, we should always know the caller.
+  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
+  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
+    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
+    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    if (ic.IsUnitialized()) {
       VLOG(compiler) << "Interface or virtual call to "
                      << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
+                     << " is not hit and not inlined";
       return false;
-    }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
+    } else if (ic.IsMonomorphic()) {
+      MaybeRecordStat(kMonomorphicCall);
+      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+    } else if (ic.IsPolymorphic()) {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+    } else {
+      DCHECK(ic.IsMegamorphic());
       VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+                     << PrettyMethod(method_index, caller_dex_file)
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      invoke_instruction->GetDexPc());
+
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(load_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(field_get, load_class);
+  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
+  // TODO
+  VLOG(compiler) << "Unimplemented polymorphic inlining for "
+                 << PrettyMethod(resolved_method);
+  return false;
+}
+
+bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t method_index = FindMethodIndexIn(
+      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+  if (method_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to "
+                   << PrettyMethod(method)
+                   << " cannot be inlined because unaccessible to caller";
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is too big to inline";
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -273,7 +424,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
     return false;
   }
 
@@ -284,7 +435,8 @@
 
 bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
                                  HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+                                 bool same_dex_file,
+                                 bool do_rtp) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -337,6 +489,7 @@
       invoke_type,
       graph_->IsDebuggable(),
       graph_->GetCurrentInstructionId());
+  callee_graph->SetArtMethod(resolved_method);
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -418,6 +571,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     outermost_graph_,
                      codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -529,9 +683,9 @@
   HNullConstant* null_constant = graph_->GetNullConstant();
   if (!null_constant->GetReferenceTypeInfo().IsValid()) {
     ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
     null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
   // Check the integrity of reference types and run another type propagation if needed.
@@ -550,14 +704,16 @@
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
 
-    // If the return type is a refinement of the declared type run the type propagation again.
-    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-    if (invoke_rti.IsStrictSupertypeOf(return_rti)
-        || (return_rti.IsExact() && !invoke_rti.IsExact())
-        || !return_replacement->CanBeNull()) {
-      ReferenceTypePropagation rtp_fixup(graph_, handles_);
-      rtp_fixup.Run();
+    if (do_rtp) {
+      // If the return type is a refinement of the declared type run the type propagation again.
+      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+      if (invoke_rti.IsStrictSupertypeOf(return_rti)
+          || (return_rti.IsExact() && !invoke_rti.IsExact())
+          || !return_replacement->CanBeNull()) {
+        ReferenceTypePropagation rtp_fixup(graph_, handles_);
+        rtp_fixup.Run();
+      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 0f6a945..7b9fb73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -27,11 +27,13 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
            CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
@@ -40,6 +42,7 @@
            OptimizingCompilerStats* stats,
            size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
@@ -54,10 +57,33 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
+
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining.
+  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
-                         bool same_dex_file);
+                         bool same_dex_file,
+                         bool do_rtp = true);
 
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a..c504ded 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -39,6 +39,12 @@
     }
   }
 
+  bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotate(HBinaryOperation* instruction);
+  bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
   void VisitShift(HBinaryOperation* shift);
 
@@ -77,6 +83,7 @@
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifyRotate(HInvoke* invoke, bool is_left);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
 
@@ -169,20 +176,165 @@
       //    src
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
-    } else if (instruction->IsShl() && input_cst->IsOne()) {
-      // Replace Shl looking like
-      //    SHL dst, src, 1
-      // with
-      //    ADD dst, src, src
-      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
-                                                   input_other,
-                                                   input_other);
-      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
-      RecordSimplification();
     }
   }
 }
 
+static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) {
+  return (sub->GetRight() == other &&
+          sub->GetLeft()->IsConstant() &&
+          (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0);
+}
+
+bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
+                                                        HUShr* ushr,
+                                                        HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(),
+                                                ushr->GetLeft(),
+                                                ushr->GetRight());
+  op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror);
+  if (!ushr->HasUses()) {
+    ushr->GetBlock()->RemoveInstruction(ushr);
+  }
+  if (!ushr->GetRight()->HasUses()) {
+    ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight());
+  }
+  if (!shl->HasUses()) {
+    shl->GetBlock()->RemoveInstruction(shl);
+  }
+  if (!shl->GetRight()->HasUses()) {
+    shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
+  }
+  return true;
+}
+
+// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
+bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return false;
+  }
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+  // If we have an UShr and a Shl (in either order).
+  if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) {
+    HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr();
+    HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl();
+    DCHECK(Primitive::IsIntOrLongType(ushr->GetType()));
+    if (ushr->GetType() == shl->GetType() &&
+        ushr->GetLeft() == shl->GetLeft()) {
+      if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) {
+        // Shift distances are both constant, try replacing with Ror if they
+        // add up to the register size.
+        return TryReplaceWithRotateConstantPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) {
+        // Shift distances are potentially of the form x and (reg_size - x).
+        return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) {
+        // Shift distances are potentially of the form d and -d.
+        return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl);
+      }
+    }
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> #rdist OP x << #ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #ldist
+//    OP   dst, dst, tmp
+// or like (x >>> #rdist OP x << #-ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #-ldist
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   #rdist
+bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op,
+                                                                       HUShr* ushr,
+                                                                       HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant());
+  size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant());
+  if (((ldist + rdist) & (reg_bits - 1)) == 0) {
+    ReplaceRotateWithRor(op, ushr, shl);
+    return true;
+  }
+  return false;
+}
+
+// Replace code looking like (x >>> -d OP x << d):
+//    Neg  neg, d
+//    UShr dst, x,   neg
+//    Shl  tmp, x,   d
+//    OP   dst, dst, tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,   neg
+// *** OR ***
+// Replace code looking like (x >>> d OP x << -d):
+//    UShr dst, x,   d
+//    Neg  neg, d
+//    Shl  tmp, x,   neg
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   d
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg());
+  bool neg_is_left = shl->GetRight()->IsNeg();
+  HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg();
+  // And the shift distance being negated is the distance being shifted the other way.
+  if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) {
+    ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> d OP x << (#bits - d)):
+//    UShr dst, x,     d
+//    Sub  ld,  #bits, d
+//    Shl  tmp, x,     ld
+//    OP   dst, dst,   tmp
+// with
+//    Ror  dst, x,     d
+// *** OR ***
+// Replace code looking like (x >>> (#bits - d) OP x << d):
+//    Sub  rd,  #bits, d
+//    UShr dst, x,     rd
+//    Shl  tmp, x,     d
+//    OP   dst, dst,   tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,     neg
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  HInstruction* shl_shift = shl->GetRight();
+  HInstruction* ushr_shift = ushr->GetRight();
+  if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) ||
+      (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) {
+    return ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
   HInstruction* obj = null_check->InputAt(0);
   if (!obj->CanBeNull()) {
@@ -372,9 +524,8 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
-        // Replace (bool_value == false) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
+        block->RemoveInstruction(equal);
         RecordSimplification();
       } else {
         // Replace (bool_value == integer_not_zero_nor_one_constant) with false
@@ -399,9 +550,8 @@
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
-        // Replace (bool_value != true) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
+        block->RemoveInstruction(not_equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value != false) with bool_value
@@ -542,7 +692,10 @@
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
     RecordSimplification();
     neg->GetBlock()->RemoveInstruction(neg);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -624,13 +777,6 @@
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
   // Try to fold an HCompare into this HCondition.
 
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS and MIPS64.
-  InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  if (instruction_set == kMips || instruction_set == kMips64) {
-    return;
-  }
-
   HInstruction* left = condition->GetLeft();
   HInstruction* right = condition->GetRight();
   // We can only replace an HCondition which compares a Compare to 0.
@@ -796,6 +942,34 @@
       HShl* shl = new(allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+    } else if (IsPowerOfTwo(factor - 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n + 1)
+      // into
+      //    SHL tmp, src, n
+      //    ADD dst, src, tmp
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1)));
+      HAdd* add = new (allocator) HAdd(type, input_other, shl);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+    } else if (IsPowerOfTwo(factor + 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n - 1)
+      // into
+      //    SHL tmp, src, n
+      //    SUB dst, tmp, src
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1)));
+      HSub* sub = new (allocator) HSub(type, shl, input_other);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, sub);
+      RecordSimplification();
     }
   }
 }
@@ -890,7 +1064,10 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1011,6 +1188,8 @@
     RecordSimplification();
     return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
@@ -1079,6 +1258,42 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return;
+  }
+  HInstruction* value = invoke->InputAt(0);
+  HInstruction* distance = invoke->InputAt(1);
+  // Replace the invoke with an HRor.
+  if (is_left) {
+    distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance);
+    invoke->GetBlock()->InsertInstructionBefore(distance, invoke);
+  }
+  HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
+  // Remove ClinitCheck and LoadClass, if possible.
+  HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+  if (clinit->IsClinitCheck() && !clinit->HasUses()) {
+    clinit->GetBlock()->RemoveInstruction(clinit);
+    HInstruction* ldclass = clinit->InputAt(0);
+    if (ldclass->IsLoadClass() && !ldclass->HasUses()) {
+      ldclass->GetBlock()->RemoveInstruction(ldclass);
+    }
+  }
+}
+
 static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
   if (potential_length->IsArrayLength()) {
     return potential_length->InputAt(0) == potential_array;
@@ -1149,6 +1364,12 @@
     SimplifyStringEquals(instruction);
   } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
     SimplifySystemArrayCopy(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) {
+    SimplifyRotate(instruction, false);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
+    SimplifyRotate(instruction, true);
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index eb79f46..6a34b13 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,11 +16,16 @@
 
 #include "instruction_simplifier_arm64.h"
 
+#include "common_arm64.h"
 #include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
 
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+using helpers::ShifterOperandSupportsExtension;
+
 void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
                                                                      HInstruction* array,
                                                                      HInstruction* index,
@@ -62,6 +67,169 @@
   RecordSimplification();
 }
 
+bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
+                                                                   HInstruction* bitfield_op,
+                                                                   bool do_merge) {
+  DCHECK(HasShifterOperand(use));
+  DCHECK(use->IsBinaryOperation() || use->IsNeg());
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+  DCHECK(!bitfield_op->HasEnvironmentUses());
+
+  Primitive::Type type = use->GetType();
+  if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+    return false;
+  }
+
+  HInstruction* left;
+  HInstruction* right;
+  if (use->IsBinaryOperation()) {
+    left = use->InputAt(0);
+    right = use->InputAt(1);
+  } else {
+    DCHECK(use->IsNeg());
+    right = use->AsNeg()->InputAt(0);
+    left = GetGraph()->GetConstant(right->GetType(), 0);
+  }
+  DCHECK(left == bitfield_op || right == bitfield_op);
+
+  if (left == right) {
+    // TODO: Handle special transformations in this situation?
+    // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+    // Or should this be part of a separate transformation logic?
+    return false;
+  }
+
+  bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative();
+  HInstruction* other_input;
+  if (bitfield_op == right) {
+    other_input = left;
+  } else {
+    if (is_commutative) {
+      other_input = right;
+    } else {
+      return false;
+    }
+  }
+
+  HArm64DataProcWithShifterOp::OpKind op_kind;
+  int shift_amount = 0;
+  HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
+      !ShifterOperandSupportsExtension(use)) {
+    return false;
+  }
+
+  if (do_merge) {
+    HArm64DataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
+                                                                 other_input,
+                                                                 bitfield_op->InputAt(0),
+                                                                 op_kind,
+                                                                 shift_amount,
+                                                                 use->GetDexPc());
+    use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+    if (bitfield_op->GetUses().IsEmpty()) {
+      bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+    }
+    RecordSimplification();
+  }
+
+  return true;
+}
+
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+
+  if (bitfield_op->HasEnvironmentUses()) {
+    return false;
+  }
+
+  const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+  // Check whether we can merge the instruction in all its users' shifter operand.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    if (!HasShifterOperand(use)) {
+      return false;
+    }
+    if (!CanMergeIntoShifterOperand(use, bitfield_op)) {
+      return false;
+    }
+  }
+
+  // Merge the instruction into its uses.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    bool merged = MergeIntoShifterOperand(use, bitfield_op);
+    DCHECK(merged);
+  }
+
+  return true;
+}
+
+bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
+    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return false;
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -76,5 +244,110 @@
                                Primitive::ComponentSize(instruction->GetComponentType()));
 }
 
+void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntOrLongType(type)) {
+    return;
+  }
+
+  HInstruction* use = instruction->HasNonEnvironmentUses()
+      ? instruction->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
+    // Replace code looking like
+    //    MUL tmp, x, y
+    //    SUB dst, acc, tmp
+    // with
+    //    MULSUB dst, acc, x, y
+    // Note that we do not want to (unconditionally) perform the merge when the
+    // multiplication has multiple uses and it can be merged in all of them.
+    // Multiple uses could happen on the same control-flow path, and we would
+    // then increase the amount of work. In the future we could try to evaluate
+    // whether all uses are on different control-flow paths (using dominance and
+    // reverse-dominance information) and only perform the merge when they are.
+    HInstruction* accumulator = nullptr;
+    HBinaryOperation* binop = use->AsBinaryOperation();
+    HInstruction* binop_left = binop->GetLeft();
+    HInstruction* binop_right = binop->GetRight();
+    // Be careful after GVN. This should not happen since the `HMul` has only
+    // one use.
+    DCHECK_NE(binop_left, binop_right);
+    if (binop_right == instruction) {
+      accumulator = binop_left;
+    } else if (use->IsAdd()) {
+      DCHECK_EQ(binop_left, instruction);
+      accumulator = binop_right;
+    }
+
+    if (accumulator != nullptr) {
+      HArm64MultiplyAccumulate* mulacc =
+          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
+                                                                binop->GetKind(),
+                                                                accumulator,
+                                                                instruction->GetLeft(),
+                                                                instruction->GetRight());
+
+      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+      DCHECK(!instruction->HasUses());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (instruction->GetLeft() == instruction->GetRight()) {
+    return;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
+    return;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
+    return;
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+
+  if (input_type == result_type) {
+    // We let the arch-independent code handle this.
+    return;
+  }
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4b697db..b7f490b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -39,9 +39,30 @@
                                     HInstruction* array,
                                     HInstruction* index,
                                     int access_size);
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use,
+                                  HInstruction* bitfield_op,
+                                  bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, true);
+  }
 
+  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                           HBinaryOperation* input_binop,
+                                           HInstruction* input_other);
+
+  // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b01324e..7127215 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -189,6 +189,42 @@
       return ((method.d.data & kIntrinsicFlagMin) == 0) ?
           Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
 
+    // More math builtins.
+    case kIntrinsicCos:
+      return Intrinsics::kMathCos;
+    case kIntrinsicSin:
+      return Intrinsics::kMathSin;
+    case kIntrinsicAcos:
+      return Intrinsics::kMathAcos;
+    case kIntrinsicAsin:
+      return Intrinsics::kMathAsin;
+    case kIntrinsicAtan:
+      return Intrinsics::kMathAtan;
+    case kIntrinsicAtan2:
+      return Intrinsics::kMathAtan2;
+    case kIntrinsicCbrt:
+      return Intrinsics::kMathCbrt;
+    case kIntrinsicCosh:
+      return Intrinsics::kMathCosh;
+    case kIntrinsicExp:
+      return Intrinsics::kMathExp;
+    case kIntrinsicExpm1:
+      return Intrinsics::kMathExpm1;
+    case kIntrinsicHypot:
+      return Intrinsics::kMathHypot;
+    case kIntrinsicLog:
+      return Intrinsics::kMathLog;
+    case kIntrinsicLog10:
+      return Intrinsics::kMathLog10;
+    case kIntrinsicNextAfter:
+      return Intrinsics::kMathNextAfter;
+    case kIntrinsicSinh:
+      return Intrinsics::kMathSinh;
+    case kIntrinsicTan:
+      return Intrinsics::kMathTan;
+    case kIntrinsicTanh:
+      return Intrinsics::kMathTanh;
+
     // Misc math.
     case kIntrinsicSqrt:
       return Intrinsics::kMathSqrt;
@@ -384,7 +420,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index d2017da..4683aee 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -113,10 +113,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -127,10 +127,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -240,178 +240,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
-static void GenIntegerRotate(LocationSummary* locations,
-                             ArmAssembler* assembler,
-                             bool is_left) {
-  Register in = locations->InAt(0).AsRegister<Register>();
-  Location rhs = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-
-  if (rhs.IsConstant()) {
-    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
-    // so map all rotations to a +ve. equivalent in that range.
-    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
-    if (rot) {
-      // Rotate, mapping left rotations to right equivalents if necessary.
-      // (e.g. left by 2 bits == right by 30.)
-      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
-    } else if (out != in) {
-      __ Mov(out, in);
-    }
-  } else {
-    if (is_left) {
-      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
-      __ Ror(out, in, out);
-    } else {
-      __ Ror(out, in, rhs.AsRegister<Register>());
-    }
-  }
-}
-
-// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
-// rotates by swapping input regs (effectively rotating by the first 32-bits of
-// a larger rotation) or flipping direction (thus treating larger right/left
-// rotations as sub-word sized rotations in the other direction) as appropriate.
-static void GenLongRotate(LocationSummary* locations,
-                          ArmAssembler* assembler,
-                          bool is_left) {
-  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-  Location rhs = locations->InAt(1);
-  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
-  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-  if (rhs.IsConstant()) {
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
-    // Map all left rotations to right equivalents.
-    if (is_left) {
-      rot = 0x40 - rot;
-    }
-    // Map all rotations to +ve. equivalents on the interval [0,63].
-    rot &= 0x3F;
-    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
-    // logic below to a simple pair of binary orr.
-    // (e.g. 34 bits == in_reg swap + 2 bits right.)
-    if (rot >= 0x20) {
-      rot -= 0x20;
-      std::swap(in_reg_hi, in_reg_lo);
-    }
-    // Rotate, or mov to out for zero or word size rotations.
-    if (rot) {
-      __ Lsr(out_reg_hi, in_reg_hi, rot);
-      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
-      __ Lsr(out_reg_lo, in_reg_lo, rot);
-      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
-    } else {
-      __ Mov(out_reg_lo, in_reg_lo);
-      __ Mov(out_reg_hi, in_reg_hi);
-    }
-  } else {
-    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
-    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
-    Label end;
-    Label right;
-
-    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
-    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
-    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
-
-    if (is_left) {
-      __ b(&right, CS);
-    } else {
-      __ b(&right, CC);
-      std::swap(shift_left, shift_right);
-    }
-
-    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
-    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
-    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ Lsr(shift_left, in_reg_hi, shift_right);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
-    __ b(&end);
-
-    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
-    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
-    __ Bind(&right);
-    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ Lsl(shift_right, in_reg_hi, shift_left);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
-
-    __ Bind(&end);
-  }
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
-}
-
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
@@ -429,7 +257,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
@@ -437,7 +265,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -486,7 +314,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 
@@ -495,7 +323,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -526,7 +354,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -534,7 +362,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
@@ -742,22 +570,22 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
@@ -787,31 +615,34 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
 }
 
 static void GenUnsafePut(LocationSummary* locations,
@@ -873,31 +704,67 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
@@ -958,8 +825,15 @@
   Label loop_head;
   __ Bind(&loop_head);
 
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
   __ ldrex(tmp_lo, tmp_ptr);
-  // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
 
@@ -985,15 +859,17 @@
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS below).
+  //
+  // Also, the UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
+  // off temporarily as a quick fix (b/26204023).
   //
-  // TODO(rpl): Fix it and turn it back on.
-  //
-  // TODO(rpl): Also, we should investigate whether we need a read
-  // barrier in the generated code.
-  if (kPoisonHeapReferences) {
+  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
     return;
   }
 
@@ -1245,7 +1121,8 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1265,7 +1142,8 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1644,7 +1522,7 @@
                        temp2,
                        dest,
                        Register(kNoRegister),
-                       false);
+                       /* can_be_null */ false);
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1659,8 +1537,12 @@
 
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
@@ -1677,6 +1559,23 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 059abf0..9f6863c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -143,7 +143,23 @@
 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM64 slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ masm->
@@ -186,10 +202,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -200,10 +216,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -314,103 +330,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenRotateRight(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           rhs.reg());
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
-static void GenRotateLeft(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t regsize = RegisterFrom(in, type).SizeInBits();
-    uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
-    __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Neg(RegisterFrom(out, type),
-           Operand(RegisterFrom(locations->InAt(1), type)));
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           RegisterFrom(out, type));
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
                        vixl::MacroAssembler* masm) {
@@ -461,7 +380,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -469,7 +388,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -498,7 +417,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
@@ -506,7 +425,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -541,7 +460,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -549,7 +468,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -557,7 +476,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -565,7 +484,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -598,7 +518,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -606,7 +526,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -614,7 +534,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -622,7 +542,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
@@ -698,7 +618,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -706,7 +626,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -818,9 +738,12 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
-  Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
-  Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
-  Register trg = RegisterFrom(locations->Out(), type);
+  Location base_loc = locations->InAt(1);
+  Register base = WRegisterFrom(base_loc);      // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = XRegisterFrom(offset_loc);  // Long offset.
+  Location trg_loc = locations->Out();
+  Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
@@ -837,13 +760,18 @@
 
   if (type == Primitive::kPrimNot) {
     DCHECK(trg.IsW());
-    codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -871,22 +799,22 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -977,31 +905,67 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1057,6 +1021,9 @@
   if (use_acquire_release) {
     __ Bind(&loop_head);
     __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1064,6 +1031,14 @@
   } else {
     __ Dmb(InnerShareable, BarrierWrites);
     __ Bind(&loop_head);
+    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+    // the reference stored in the object before attempting the CAS,
+    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+    // implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
@@ -1087,11 +1062,17 @@
   CreateIntIntIntIntIntToInt(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS below).
+  //
+  // Also, the UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
-  // TODO(rpl): Fix it and turn it back on.
-  if (kPoisonHeapReferences) {
+  // off temporarily as a quick fix (b/26204023).
+  //
+  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
     return;
   }
 
@@ -1345,7 +1326,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1365,7 +1347,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1454,11 +1437,33 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 8f1d5e1..96f43a0 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -51,6 +51,23 @@
   V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
   V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \
   V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCos, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathSin, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathExp, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathLog, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathTan, kStatic, kNeedsEnvironmentOrCache) \
+  V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \
   V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \
   V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \
   V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index a94e3a8..06fab61 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,6 +43,14 @@
   return codegen_->GetGraph()->GetArena();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+  return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+}
+
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+  return codegen_->GetInstructionSetFeatures().IsR6();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -138,6 +146,681 @@
 
 #define __ assembler->
 
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Mfc1(out_lo, in);
+    __ Mfhc1(out_hi, in);
+  } else {
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    __ Mtc1(in_lo, out);
+    __ Mthc1(in_hi, out);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena,
+                                    HInvoke* invoke,
+                                    Location::OutputOverlap overlaps = Location::kNoOutputOverlap) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), overlaps);
+}
+
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       bool isR2OrNewer,
+                       bool isR6,
+                       bool reverseBits,
+                       MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimShort ||
+         type == Primitive::kPrimInt ||
+         type == Primitive::kPrimLong);
+  DCHECK(type != Primitive::kPrimShort || !reverseBits);
+
+  if (type == Primitive::kPrimShort) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Wsbh(out, in);
+      __ Seh(out, out);
+    } else {
+      __ Sll(TMP, in, 24);
+      __ Sra(TMP, TMP, 16);
+      __ Sll(out, in, 16);
+      __ Srl(out, out, 24);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+    } else {
+      // MIPS32r1
+      // __ Rotr(out, in, 16);
+      __ Sll(TMP, in, 16);
+      __ Srl(out, in, 16);
+      __ Or(out, out, TMP);
+      // __ Wsbh(out, out);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(TMP, out, AT);
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out, out, 8);
+      __ And(out, out, AT);
+      __ Or(out, out, TMP);
+    }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out, out);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out, out, 4);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out, out, 2);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out, out, 1);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+      }
+    }
+  } else if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(AT, in_hi, 16);
+      __ Rotr(TMP, in_lo, 16);
+      __ Wsbh(out_lo, AT);
+      __ Wsbh(out_hi, TMP);
+    } else {
+      // When calling CreateIntToIntLocations() we promised that the
+      // use of the out_lo/out_hi wouldn't overlap with the use of
+      // in_lo/in_hi. Be very careful not to write to out_lo/out_hi
+      // until we're completely done reading from in_lo/in_hi.
+      // __ Rotr(TMP, in_lo, 16);
+      __ Sll(TMP, in_lo, 16);
+      __ Srl(AT, in_lo, 16);
+      __ Or(TMP, TMP, AT);             // Hold in TMP until it's safe
+                                       // to write to out_hi.
+      // __ Rotr(out_lo, in_hi, 16);
+      __ Sll(AT, in_hi, 16);
+      __ Srl(out_lo, in_hi, 16);        // Here we are finally done reading
+                                        // from in_lo/in_hi so it's okay to
+                                        // write to out_lo/out_hi.
+      __ Or(out_lo, out_lo, AT);
+      // __ Wsbh(out_hi, out_hi);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(out_hi, TMP, AT);
+      __ Sll(out_hi, out_hi, 8);
+      __ Srl(TMP, TMP, 8);
+      __ And(TMP, TMP, AT);
+      __ Or(out_hi, out_hi, TMP);
+      // __ Wsbh(out_lo, out_lo);
+      __ And(TMP, out_lo, AT);  // AT already holds the correct mask value
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out_lo, out_lo, 8);
+      __ And(out_lo, out_lo, AT);
+      __ Or(out_lo, out_lo, TMP);
+    }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out_hi, out_hi);
+        __ Bitswap(out_lo, out_lo);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_hi, out_hi, 4);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_lo, out_lo, 4);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_hi, out_hi, 2);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_lo, out_lo, 2);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_hi, out_hi, 1);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_lo, out_lo, 1);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+      }
+    }
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimShort,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
+}
+
+static void GenNumberOfLeadingZeroes(LocationSummary* locations,
+                                     bool is64bit,
+                                     bool isR6,
+                                     MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    if (isR6) {
+      __ ClzR6(AT, in_hi);
+      __ ClzR6(TMP, in_lo);
+      __ Seleqz(TMP, TMP, in_hi);
+    } else {
+      __ ClzR2(AT, in_hi);
+      __ ClzR2(TMP, in_lo);
+      __ Movn(TMP, ZERO, in_hi);
+    }
+    __ Addu(out, AT, TMP);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    if (isR6) {
+      __ ClzR6(out, in);
+    } else {
+      __ ClzR2(out, in);
+    }
+  }
+}
+
+// int java.lang.Integer.numberOfLeadingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfLeadingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, IsR6(), GetAssembler());
+}
+
+static void GenNumberOfTrailingZeroes(LocationSummary* locations,
+                                      bool is64bit,
+                                      bool isR6,
+                                      bool isR2OrNewer,
+                                      MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  Register in_lo;
+  Register in;
+
+  if (is64bit) {
+    MipsLabel done;
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+
+    // If in_lo is zero then count the number of trailing zeroes in in_hi;
+    // otherwise count the number of trailing zeroes in in_lo.
+    // AT = in_lo ? in_lo : in_hi;
+    if (isR6) {
+      __ Seleqz(out, in_hi, in_lo);
+      __ Selnez(TMP, in_lo, in_lo);
+      __ Or(out, out, TMP);
+    } else {
+      __ Movz(out, in_hi, in_lo);
+      __ Movn(out, in_lo, in_lo);
+    }
+
+    in = out;
+  } else {
+    in = locations->InAt(0).AsRegister<Register>();
+    // Give in_lo a dummy value to keep the compiler from complaining.
+    // Since we only get here in the 32-bit case, this value will never
+    // be used.
+    in_lo = in;
+  }
+
+  // We don't have an instruction to count the number of trailing zeroes.
+  // Start by flipping the bits end-for-end so we can count the number of
+  // leading zeroes instead.
+  if (isR2OrNewer) {
+    __ Rotr(out, in, 16);
+    __ Wsbh(out, out);
+  } else {
+    // MIPS32r1
+    // __ Rotr(out, in, 16);
+    __ Sll(TMP, in, 16);
+    __ Srl(out, in, 16);
+    __ Or(out, out, TMP);
+    // __ Wsbh(out, out);
+    __ LoadConst32(AT, 0x00FF00FF);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 8);
+    __ Srl(out, out, 8);
+    __ And(out, out, AT);
+    __ Or(out, out, TMP);
+  }
+
+  if (isR6) {
+    __ Bitswap(out, out);
+    __ ClzR6(out, out);
+  } else {
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 4);
+    __ Srl(out, out, 4);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 2);
+    __ Srl(out, out, 2);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 1);
+    __ Srl(out, out, 1);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ ClzR2(out, out);
+  }
+
+  if (is64bit) {
+    // If in_lo is zero, then we counted the number of trailing zeroes in in_hi so we must add the
+    // number of trailing zeroes in in_lo (32) to get the correct final count
+    __ LoadConst32(TMP, 32);
+    if (isR6) {
+      __ Seleqz(TMP, TMP, in_lo);
+    } else {
+      __ Movn(TMP, ZERO, in_lo);
+    }
+    __ Addu(out, out, TMP);
+  }
+}
+
+// int java.lang.Integer.numberOfTrailingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, IsR6(), IsR2OrNewer(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfTrailingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, IsR6(), IsR2OrNewer(), GetAssembler());
+}
+
+enum RotationDirection {
+  kRotateRight,
+  kRotateLeft,
+};
+
+static void GenRotate(HInvoke* invoke,
+                      Primitive::Type type,
+                      bool isR2OrNewer,
+                      RotationDirection direction,
+                      MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  LocationSummary* locations = invoke->GetLocations();
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    int32_t shift = static_cast<int32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
+    if (type == Primitive::kPrimInt) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      shift &= 0x1f;
+      if (direction == kRotateLeft) {
+        shift = (32 - shift) & 0x1F;
+      }
+
+      if (isR2OrNewer) {
+        if ((shift != 0) || (out != in)) {
+          __ Rotr(out, in, shift);
+        }
+      } else {
+        if (shift == 0) {
+          if (out != in) {
+            __ Move(out, in);
+          }
+        } else {
+          __ Srl(AT, in, shift);
+          __ Sll(out, in, 32 - shift);
+          __ Or(out, out, AT);
+        }
+      }
+    } else {    // Primitive::kPrimLong
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      shift &= 0x3f;
+      if (direction == kRotateLeft) {
+        shift = (64 - shift) & 0x3F;
+      }
+
+      if (shift == 0) {
+        __ Move(out_lo, in_lo);
+        __ Move(out_hi, in_hi);
+      } else if (shift == 32) {
+        __ Move(out_lo, in_hi);
+        __ Move(out_hi, in_lo);
+      } else if (shift < 32) {
+        __ Srl(AT, in_lo, shift);
+        __ Sll(out_lo, in_hi, 32 - shift);
+        __ Or(out_lo, out_lo, AT);
+        __ Srl(AT, in_hi, shift);
+        __ Sll(out_hi, in_lo, 32 - shift);
+        __ Or(out_hi, out_hi, AT);
+      } else {
+        __ Sll(AT, in_lo, 64 - shift);
+        __ Srl(out_lo, in_hi, shift - 32);
+        __ Or(out_lo, out_lo, AT);
+        __ Sll(AT, in_hi, 64 - shift);
+        __ Srl(out_hi, in_lo, shift - 32);
+        __ Or(out_hi, out_hi, AT);
+      }
+    }
+  } else {      // !invoke->InputAt(1)->IsIntConstant()
+    Register shamt = locations->InAt(1).AsRegister<Register>();
+    if (type == Primitive::kPrimInt) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (isR2OrNewer) {
+        if (direction == kRotateRight) {
+          __ Rotrv(out, in, shamt);
+        } else {
+          // negu tmp, shamt
+          __ Subu(TMP, ZERO, shamt);
+          __ Rotrv(out, in, TMP);
+        }
+      } else {
+        if (direction == kRotateRight) {
+          __ Srlv(AT, in, shamt);
+          __ Subu(TMP, ZERO, shamt);
+          __ Sllv(out, in, TMP);
+          __ Or(out, out, AT);
+        } else {
+          __ Sllv(AT, in, shamt);
+          __ Subu(TMP, ZERO, shamt);
+          __ Srlv(out, in, TMP);
+          __ Or(out, out, AT);
+        }
+      }
+    } else {    // Primitive::kPrimLong
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel done;
+
+      if (direction == kRotateRight) {
+        __ Nor(TMP, ZERO, shamt);
+        __ Srlv(AT, in_lo, shamt);
+        __ Sll(out_lo, in_hi, 1);
+        __ Sllv(out_lo, out_lo, TMP);
+        __ Or(out_lo, out_lo, AT);
+        __ Srlv(AT, in_hi, shamt);
+        __ Sll(out_hi, in_lo, 1);
+        __ Sllv(out_hi, out_hi, TMP);
+        __ Or(out_hi, out_hi, AT);
+      } else {
+        __ Nor(TMP, ZERO, shamt);
+        __ Sllv(AT, in_lo, shamt);
+        __ Srl(out_lo, in_hi, 1);
+        __ Srlv(out_lo, out_lo, TMP);
+        __ Or(out_lo, out_lo, AT);
+        __ Sllv(AT, in_hi, shamt);
+        __ Srl(out_hi, in_lo, 1);
+        __ Srlv(out_hi, out_hi, TMP);
+        __ Or(out_hi, out_hi, AT);
+      }
+
+      __ Andi(TMP, shamt, 32);
+      __ Beqz(TMP, &done);
+      __ Move(TMP, out_hi);
+      __ Move(out_hi, out_lo);
+      __ Move(out_lo, TMP);
+
+      __ Bind(&done);
+    }
+  }
+}
+
+// int java.lang.Integer.rotateRight(int i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateRight, GetAssembler());
+}
+
+// long java.lang.Long.rotateRight(long i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongRotateRight(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateRight, GetAssembler());
+}
+
+// int java.lang.Integer.rotateLeft(int i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateLeft, GetAssembler());
+}
+
+// long java.lang.Long.rotateLeft(long i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongRotateLeft(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateLeft, GetAssembler());
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             true,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             true,
+             GetAssembler());
+}
+
 // boolean java.lang.String.equals(Object anObject)
 void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -248,17 +931,6 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat)
-UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble)
-UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits)
-UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits)
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
@@ -311,18 +983,29 @@
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index c71b3c6..19ad525 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,6 +67,9 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  bool IsR2OrNewer(void);
+  bool IsR6(void);
+
  private:
   MipsAssembler* GetAssembler();
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3654159..8b45ea7 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -115,7 +115,7 @@
     }
 
     RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
@@ -162,7 +162,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // int java.lang.Float.floatToRawIntBits(float)
@@ -171,7 +171,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -199,7 +199,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Float.intBitsToFloat(int)
@@ -208,7 +208,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -290,7 +290,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfLeadingZeros(long i)
@@ -299,7 +299,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenNumberOfTrailingZeroes(LocationSummary* locations,
@@ -327,7 +327,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfTrailingZeros(long i)
@@ -336,7 +336,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenRotateRight(HInvoke* invoke,
@@ -525,7 +525,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Math.abs(float)
@@ -534,7 +534,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -566,7 +566,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // long java.lang.Math.abs(long)
@@ -575,7 +575,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -616,7 +616,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
 }
 
 // float java.lang.Math.min(float, float)
@@ -625,7 +625,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
 }
 
 // double java.lang.Math.max(double, double)
@@ -634,7 +634,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
 }
 
 // float java.lang.Math.max(float, float)
@@ -643,7 +643,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -713,7 +713,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // long java.lang.Math.min(long, long)
@@ -722,7 +722,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // int java.lang.Math.max(int, int)
@@ -731,7 +731,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // long java.lang.Math.max(long, long)
@@ -740,7 +740,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // double java.lang.Math.sqrt(double)
@@ -806,7 +806,7 @@
 
   DCHECK_NE(in, out);
 
-  Label done;
+  Mips64Label done;
 
   // double floor/ceil(double in) {
   //     if in.isNaN || in.isInfinite || in.isZero {
@@ -1045,7 +1045,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
@@ -1054,7 +1054,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
@@ -1063,7 +1063,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 
 // long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
@@ -1072,7 +1072,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
@@ -1081,7 +1081,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
@@ -1090,7 +1090,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1151,7 +1151,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x)
@@ -1160,7 +1164,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x)
@@ -1169,7 +1177,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObject(Object o, long offset, Object x)
@@ -1178,7 +1190,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x)
@@ -1187,7 +1203,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x)
@@ -1196,7 +1216,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLong(Object o, long offset, long x)
@@ -1205,7 +1229,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x)
@@ -1214,7 +1242,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
@@ -1223,7 +1255,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1256,13 +1292,15 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  Label loop_head, exit_loop;
+  Mips64Label loop_head, exit_loop;
   __ Daddu(TMP, base, offset);
   __ Sync(0);
   __ Bind(&loop_head);
   if (type == Primitive::kPrimLong) {
     __ Lld(out, TMP);
   } else {
+    // Note: We will need a read barrier here, when read barrier
+    // support is added to the MIPS64 back end.
     __ Ll(out, TMP);
   }
   __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
@@ -1418,10 +1456,10 @@
   GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
   GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
 
-  Label loop;
-  Label end;
-  Label return_true;
-  Label return_false;
+  Mips64Label loop;
+  Mips64Label end;
+  Mips64Label return_true;
+  Mips64Label return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1485,7 +1523,7 @@
   // If loop does not result in returning false, we return true.
   __ Bind(&return_true);
   __ LoadConst64(out, 1);
-  __ B(&end);
+  __ Bc(&end);
 
   // Return false and exit the function.
   __ Bind(&return_false);
@@ -1514,7 +1552,7 @@
       // full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
       codegen->AddSlowPath(slow_path);
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
       return;
     }
@@ -1565,7 +1603,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 // int java.lang.String.indexOf(int ch, int fromIndex)
@@ -1584,7 +1622,8 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 // java.lang.String.String(byte[] bytes)
@@ -1693,6 +1732,24 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
 #undef UNIMPLEMENTED_INTRINSIC
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 371588f..8019062 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -138,31 +138,31 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, true);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, true);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, false);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, false);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -298,7 +298,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
@@ -306,7 +306,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
@@ -490,7 +490,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -498,7 +498,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -506,7 +506,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -514,7 +514,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -597,7 +597,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
@@ -605,7 +605,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -613,7 +613,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -621,7 +621,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -788,6 +788,195 @@
   __ Bind(&done);
 }
 
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86Assembler* assembler = codegen->GetAssembler();
+
+  // We need some place to pass the parameters.
+  __ subl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(16);
+
+  // Pass the parameters at the bottom of the stack.
+  __ movsd(Address(ESP, 0), XMM0);
+
+  // If we have a second parameter, pass it next.
+  if (invoke->GetNumberOfArguments() == 2) {
+    __ movsd(Address(ESP, 8), XMM1);
+  }
+
+  // Now do the actual call.
+  __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
+
+  // Extract the return value from the FP stack.
+  __ fstpl(Address(ESP, 0));
+  __ movsd(XMM0, Address(ESP, 0));
+
+  // And clean up the stack.
+  __ addl(ESP, Immediate(16));
+  __ cfi().AdjustCFAOffset(-16);
+
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
 void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1265,19 +1454,20 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1600,12 +1790,27 @@
   Location output_loc = locations->Out();
 
   switch (type) {
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       Register output = output_loc.AsRegister<Register>();
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      Register output = output_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
     }
@@ -1633,8 +1838,10 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
-                                          bool is_long, bool is_volatile) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type,
+                                          bool is_volatile) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1646,7 +1853,7 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  if (is_long) {
+  if (type == Primitive::kPrimLong) {
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
@@ -1657,45 +1864,50 @@
   } else {
     locations->SetOut(Location::RequiresRegister());
   }
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, true, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
 }
 
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1722,31 +1934,40 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
 }
 
 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
@@ -1784,7 +2005,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -1798,31 +2019,31 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
@@ -1864,6 +2085,17 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented.
+  //
+  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -1915,6 +2147,13 @@
       __ PoisonHeapReference(value);
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -1924,11 +2163,8 @@
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
-    // In the case of the `UnsafeCASObject` intrinsic, accessing an
-    // object in the heap with LOCK CMPXCHG does not require a read
-    // barrier, as we do not keep a reference to this heap location.
-    // However, if heap poisoning is enabled, we need to unpoison the
-    // values that were poisoned earlier.
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value` has been moved to a temporary register, no need to
@@ -2267,56 +2503,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  Register first_reg = locations->InAt(0).AsRegister<Register>();
-  Location second = locations->InAt(1);
-
-  if (second.IsRegister()) {
-    Register second_reg = second.AsRegister<Register>();
-    if (is_left) {
-      __ roll(first_reg, second_reg);
-    } else {
-      __ rorl(first_reg, second_reg);
-    }
-  } else {
-    Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-    if (is_left) {
-      __ roll(first_reg, imm);
-    } else {
-      __ rorl(first_reg, imm);
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2327,6 +2513,8 @@
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 2d9f01b..aa1c109 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -115,10 +115,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -129,10 +129,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -230,7 +230,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -238,7 +238,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -277,7 +277,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
@@ -285,7 +285,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -388,7 +388,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -396,7 +397,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -404,7 +406,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -412,7 +415,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -461,7 +465,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -469,7 +473,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -477,7 +481,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -485,7 +489,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -690,7 +694,7 @@
   __ j(kUnordered, &nan);
 
   // output = double-to-long-truncate(input)
-  __ cvttsd2si(out, inPlusPointFive, true);
+  __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
   __ jmp(&done);
   __ Bind(&nan);
 
@@ -699,6 +703,188 @@
   __ Bind(&done);
 }
 
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->WillCall());
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  X86_64Assembler* assembler = codegen->GetAssembler();
+
+  __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+                                        HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall,
+                                                           kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+  // We have to ensure that the native code doesn't clobber the XMM registers which are
+  // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
+  // saved in the prologue and properly restored.
+  for (auto fp_reg : non_volatile_xmm_regs) {
+    locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1152,7 +1338,7 @@
                        temp2,
                        dest,
                        CpuRegister(kNoRegister),
-                       false);
+                       /* value_can_be_null */ false);
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1180,8 +1366,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
+                                  /* no_rip */ true));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1372,7 +1558,7 @@
     // Ensure we have a start index >= 0;
     __ xorl(counter, counter);
     __ cmpl(start_index, Immediate(0));
-    __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
+    __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
 
     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
     __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
@@ -1409,19 +1595,20 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1446,8 +1633,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1466,8 +1653,8 @@
 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   X86_64Assembler* assembler = GetAssembler();
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
@@ -1490,8 +1677,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1715,7 +1902,8 @@
 
 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
-  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
+                                                    /* no_rip */ true));
 }
 
 static void GenUnsafeGet(HInvoke* invoke,
@@ -1729,16 +1917,30 @@
   Location offset_loc = locations->InAt(2);
   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
   Location output_loc = locations->Out();
-  CpuRegister output = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister output = output_loc.AsRegister<CpuRegister>();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
+    }
 
     case Primitive::kPrimLong:
       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1750,7 +1952,9 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1763,45 +1967,50 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1871,7 +2080,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -1885,31 +2094,31 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
@@ -1941,6 +2150,17 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented.
+  //
+  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -1991,6 +2211,13 @@
       __ PoisonHeapReference(CpuRegister(value_reg));
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -2000,11 +2227,8 @@
     __ setcc(kZero, out);
     __ movzxb(out, out);
 
-    // In the case of the `UnsafeCASObject` intrinsic, accessing an
-    // object in the heap with LOCK CMPXCHG does not require a read
-    // barrier, as we do not keep a reference to this heap location.
-    // However, if heap poisoning is enabled, we need to unpoison the
-    // values that were poisoned earlier.
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value_reg` has been moved to a temporary register, no need
@@ -2289,92 +2513,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
-  Location second = locations->InAt(1);
-
-  if (is_long) {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ rolq(first_reg, second_reg);
-      } else {
-        __ rorq(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
-      if (is_left) {
-        __ rolq(first_reg, imm);
-      } else {
-        __ rorq(first_reg, imm);
-      }
-    }
-  } else {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ roll(first_reg, second_reg);
-      } else {
-        __ rorl(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-      if (is_left) {
-        __ roll(first_reg, imm);
-      } else {
-        __ rorl(first_reg, imm);
-      }
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2384,6 +2522,10 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..02befc0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -121,6 +121,8 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
         } else if (instruction->CanThrow()) {
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2bb769a..9ad003c 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -107,7 +107,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with different types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
@@ -134,7 +134,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with same types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 5b89cfe..727f2bb 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,10 +119,16 @@
       : ref_info_(ref_info),
         offset_(offset),
         index_(index),
-        declaring_class_def_index_(declaring_class_def_index) {
+        declaring_class_def_index_(declaring_class_def_index),
+        value_killed_by_loop_side_effects_(true) {
     DCHECK(ref_info != nullptr);
     DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
            (offset != kInvalidFieldOffset && index == nullptr));
+    if (ref_info->IsSingleton() && !IsArrayElement()) {
+      // Assume this location's value cannot be killed by loop side effects
+      // until proven otherwise.
+      value_killed_by_loop_side_effects_ = false;
+    }
   }
 
   ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -139,11 +145,22 @@
     return index_ != nullptr;
   }
 
+  bool IsValueKilledByLoopSideEffects() const {
+    return value_killed_by_loop_side_effects_;
+  }
+
+  void SetValueKilledByLoopSideEffects(bool val) {
+    value_killed_by_loop_side_effects_ = val;
+  }
+
  private:
   ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
   const size_t offset_;                // offset of static/instance field.
   HInstruction* const index_;          // index of an array element.
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
+  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
+                                             // side effects because this location is stored
+                                             // into inside a loop.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -335,16 +352,24 @@
     return true;
   }
 
-  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) {
-    ReferenceInfo* ref_info = FindReferenceInfoOf(ref);
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
     if (ref_info == nullptr) {
       size_t pos = ref_info_array_.size();
-      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos);
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
       ref_info_array_.push_back(ref_info);
     }
     return ref_info;
   }
 
+  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+    if (instruction->GetType() != Primitive::kPrimNot) {
+      return;
+    }
+    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+    GetOrCreateReferenceInfo(instruction);
+  }
+
   HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
                                         size_t offset,
                                         HInstruction* index,
@@ -362,13 +387,13 @@
     return heap_locations_[heap_location_idx];
   }
 
-  void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
     if (field_info.IsVolatile()) {
       has_volatile_ = true;
     }
     const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
     const size_t offset = field_info.GetFieldOffset().SizeValue();
-    GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
   }
 
   void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -378,15 +403,20 @@
 
   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
     VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
+    if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
+      location->SetValueKilledByLoopSideEffects(true);
+    }
   }
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
     VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
@@ -399,6 +429,7 @@
 
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
     VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitArraySet(HArraySet* instruction) OVERRIDE {
@@ -408,7 +439,23 @@
 
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
     // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
-    GetOrCreateReferenceInfo(new_instance);
+    CreateReferenceInfoForReferenceType(new_instance);
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
   }
 
   void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
@@ -538,23 +585,26 @@
     HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
     ArenaVector<HInstruction*>& pre_header_heap_values =
         heap_values_for_[pre_header->GetBlockId()];
+    // Inherit the values from pre-header.
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      heap_values[i] = pre_header_heap_values[i];
+    }
+
     // We do a single pass in reverse post order. For loops, use the side effects as a hint
     // to see if the heap values should be killed.
     if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
-      for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
-        // heap value is killed by loop side effects, need to keep the last store.
-        KeepIfIsStore(pre_header_heap_values[i]);
-      }
-      if (kIsDebugBuild) {
-        // heap_values should all be kUnknownHeapValue that it is inited with.
-        for (size_t i = 0; i < heap_values.size(); i++) {
-          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
-        }
-      }
-    } else {
-      // Inherit the values from pre-header.
       for (size_t i = 0; i < heap_values.size(); i++) {
-        heap_values[i] = pre_header_heap_values[i];
+        HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+        ReferenceInfo* ref_info = location->GetReferenceInfo();
+        if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
+          // heap value is killed by loop side effects (stored into directly, or due to
+          // aliasing).
+          KeepIfIsStore(pre_header_heap_values[i]);
+          heap_values[i] = kUnknownHeapValue;
+        } else {
+          // A singleton's field that's not stored into inside a loop is invariant throughout
+          // the loop.
+        }
       }
     }
   }
@@ -628,6 +678,16 @@
     }
   }
 
+  static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) {
+    return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) ||
+           (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt);
+  }
+
+  static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) {
+    return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) ||
+           (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong);
+  }
+
   void VisitGetLocation(HInstruction* instruction,
                         HInstruction* ref,
                         size_t offset,
@@ -659,7 +719,8 @@
     if ((heap_value != kUnknownHeapValue) &&
         // Keep the load due to possible I/F, J/D array aliasing.
         // See b/22538329 for details.
-        (heap_value->GetType() == instruction->GetType())) {
+        !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) &&
+        !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) {
       removed_loads_.push_back(instruction);
       substitute_instructions_for_loads_.push_back(heap_value);
       TryRemovingNullCheck(instruction);
@@ -724,8 +785,11 @@
         if (loop_info != nullptr) {
           // instruction is a store in the loop so the loop must does write.
           DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+          // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
+          DCHECK(!ref_info->IsSingleton() ||
+                 heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
 
-          if (loop_info->IsLoopInvariant(original_ref, false)) {
+          if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
             DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
             // Keep the store since its value may be needed at the loop header.
             possibly_redundant = false;
@@ -933,8 +997,9 @@
 };
 
 void LoadStoreElimination::Run() {
-  if (graph_->IsDebuggable()) {
+  if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
     // Debugger may set heap values or trigger deoptimization of callers.
+    // Try/catch support not implemented yet.
     // Skip this optimization.
     return;
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 0a39ff3..a37298c 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -587,15 +587,8 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
-bool HLoopInformation::IsLoopInvariant(HInstruction* instruction, bool must_dominate) const {
-  HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation();
-  if (other_loop != this && (other_loop == nullptr || !other_loop->IsIn(*this))) {
-    if (must_dominate) {
-      return instruction->GetBlock()->Dominates(GetHeader());
-    }
-    return true;
-  }
-  return false;
+bool HLoopInformation::IsDefinedOutOfTheLoop(HInstruction* instruction) const {
+  return !blocks_.IsBitSet(instruction->GetBlock()->GetBlockId());
 }
 
 size_t HLoopInformation::GetLifetimeEnd() const {
@@ -784,6 +777,10 @@
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
 
+HInstruction::InstructionKind HInstruction::GetKind() const {
+  return GetKindInternal();
+}
+
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
   HInstruction* next = GetNext();
   while (next != nullptr && next->IsParallelMove()) {
@@ -967,7 +964,7 @@
   visitor->Visit##name(this);                                                  \
 }
 
-FOR_EACH_INSTRUCTION(DEFINE_ACCEPT)
+FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT)
 
 #undef DEFINE_ACCEPT
 
@@ -1177,6 +1174,59 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
   DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1414,6 +1464,24 @@
   }
 }
 
+// Should be called on instructions in a dead block in post order. This method
+// assumes `insn` has been removed from all users with the exception of catch
+// phis because of missing exceptional edges in the graph. It removes the
+// instruction from catch phi uses, together with inputs of other catch phis in
+// the catch block at the same index, as these must be dead too.
+static void RemoveUsesOfDeadInstruction(HInstruction* insn) {
+  DCHECK(!insn->HasEnvironmentUses());
+  while (insn->HasNonEnvironmentUses()) {
+    HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst();
+    size_t use_index = use->GetIndex();
+    HBasicBlock* user_block =  use->GetUser()->GetBlock();
+    DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock());
+    for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+      phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
+    }
+  }
+}
+
 void HBasicBlock::DisconnectAndDelete() {
   // Dominators must be removed after all the blocks they dominate. This way
   // a loop header is removed last, a requirement for correct loop information
@@ -1516,21 +1584,13 @@
   //     graph will always remain consistent.
   for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* insn = it.Current();
-    while (insn->HasUses()) {
-      DCHECK(IsTryBlock());
-      HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst();
-      size_t use_index = use->GetIndex();
-      HBasicBlock* user_block =  use->GetUser()->GetBlock();
-      DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock());
-      for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
-      }
-    }
-
+    RemoveUsesOfDeadInstruction(insn);
     RemoveInstruction(insn);
   }
   for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
-    RemovePhi(it.Current()->AsPhi());
+    HPhi* insn = it.Current()->AsPhi();
+    RemoveUsesOfDeadInstruction(insn);
+    RemovePhi(insn);
   }
 
   // Disconnect from the dominator.
@@ -1890,7 +1950,7 @@
  *             |
  *          if_block
  *           /    \
- *  dummy_block   deopt_block
+ *  true_block   false_block
  *           \    /
  *       new_pre_header
  *             |
@@ -1898,62 +1958,73 @@
  */
 void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
   DCHECK(header->IsLoopHeader());
-  HBasicBlock* pre_header = header->GetDominator();
+  HBasicBlock* old_pre_header = header->GetDominator();
 
-  // Need this to avoid critical edge.
+  // Need extra block to avoid critical edge.
   HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  // Need this to avoid critical edge.
-  HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc());
   HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
   AddBlock(if_block);
-  AddBlock(dummy_block);
-  AddBlock(deopt_block);
+  AddBlock(true_block);
+  AddBlock(false_block);
   AddBlock(new_pre_header);
 
-  header->ReplacePredecessor(pre_header, new_pre_header);
-  pre_header->successors_.clear();
-  pre_header->dominated_blocks_.clear();
+  header->ReplacePredecessor(old_pre_header, new_pre_header);
+  old_pre_header->successors_.clear();
+  old_pre_header->dominated_blocks_.clear();
 
-  pre_header->AddSuccessor(if_block);
-  if_block->AddSuccessor(dummy_block);  // True successor
-  if_block->AddSuccessor(deopt_block);  // False successor
-  dummy_block->AddSuccessor(new_pre_header);
-  deopt_block->AddSuccessor(new_pre_header);
+  old_pre_header->AddSuccessor(if_block);
+  if_block->AddSuccessor(true_block);  // True successor
+  if_block->AddSuccessor(false_block);  // False successor
+  true_block->AddSuccessor(new_pre_header);
+  false_block->AddSuccessor(new_pre_header);
 
-  pre_header->dominated_blocks_.push_back(if_block);
-  if_block->SetDominator(pre_header);
-  if_block->dominated_blocks_.push_back(dummy_block);
-  dummy_block->SetDominator(if_block);
-  if_block->dominated_blocks_.push_back(deopt_block);
-  deopt_block->SetDominator(if_block);
+  old_pre_header->dominated_blocks_.push_back(if_block);
+  if_block->SetDominator(old_pre_header);
+  if_block->dominated_blocks_.push_back(true_block);
+  true_block->SetDominator(if_block);
+  if_block->dominated_blocks_.push_back(false_block);
+  false_block->SetDominator(if_block);
   if_block->dominated_blocks_.push_back(new_pre_header);
   new_pre_header->SetDominator(if_block);
   new_pre_header->dominated_blocks_.push_back(header);
   header->SetDominator(new_pre_header);
 
+  // Fix reverse post order.
   size_t index_of_header = IndexOfElement(reverse_post_order_, header);
   MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1);
   reverse_post_order_[index_of_header++] = if_block;
-  reverse_post_order_[index_of_header++] = dummy_block;
-  reverse_post_order_[index_of_header++] = deopt_block;
+  reverse_post_order_[index_of_header++] = true_block;
+  reverse_post_order_[index_of_header++] = false_block;
   reverse_post_order_[index_of_header++] = new_pre_header;
 
-  HLoopInformation* info = pre_header->GetLoopInformation();
-  if (info != nullptr) {
-    if_block->SetLoopInformation(info);
-    dummy_block->SetLoopInformation(info);
-    deopt_block->SetLoopInformation(info);
-    new_pre_header->SetLoopInformation(info);
-    for (HLoopInformationOutwardIterator loop_it(*pre_header);
+  // Fix loop information.
+  HLoopInformation* loop_info = old_pre_header->GetLoopInformation();
+  if (loop_info != nullptr) {
+    if_block->SetLoopInformation(loop_info);
+    true_block->SetLoopInformation(loop_info);
+    false_block->SetLoopInformation(loop_info);
+    new_pre_header->SetLoopInformation(loop_info);
+    // Add blocks to all enveloping loops.
+    for (HLoopInformationOutwardIterator loop_it(*old_pre_header);
          !loop_it.Done();
          loop_it.Advance()) {
       loop_it.Current()->Add(if_block);
-      loop_it.Current()->Add(dummy_block);
-      loop_it.Current()->Add(deopt_block);
+      loop_it.Current()->Add(true_block);
+      loop_it.Current()->Add(false_block);
       loop_it.Current()->Add(new_pre_header);
     }
   }
+
+  // Fix try/catch information.
+  TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock()
+      ? old_pre_header->GetTryCatchInformation()
+      : nullptr;
+  if_block->SetTryCatchInformation(try_catch_info);
+  true_block->SetTryCatchInformation(try_catch_info);
+  false_block->SetTryCatchInformation(try_catch_info);
+  new_pre_header->SetTryCatchInformation(try_catch_info);
 }
 
 void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
@@ -2068,6 +2139,26 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
@@ -2077,7 +2168,8 @@
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
       return os << "none";
     default:
-      return os << "unknown:" << static_cast<int>(rhs);
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
   }
 }
 
@@ -2090,4 +2182,46 @@
   env_uses_.Clear();
 }
 
+// Returns an instruction with the opposite boolean value from 'cond'.
+HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) {
+  ArenaAllocator* allocator = GetArena();
+
+  if (cond->IsCondition() &&
+      !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) {
+    // Can't reverse floating point conditions.  We have to use HBooleanNot in that case.
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    HInstruction* replacement = nullptr;
+    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
+      case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break;
+      case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break;
+      case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break;
+      case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break;
+      case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break;
+      case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break;
+      case kCondB:  replacement = new (allocator) HBelow(lhs, rhs); break;
+      case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break;
+      case kCondA:  replacement = new (allocator) HAbove(lhs, rhs); break;
+      case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break;
+      default:
+        LOG(FATAL) << "Unexpected condition";
+        UNREACHABLE();
+    }
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsZero()) {
+      return GetIntConstant(1);
+    } else {
+      DCHECK(int_const->IsOne());
+      return GetIntConstant(0);
+    }
+  } else {
+    HInstruction* replacement = new (allocator) HBooleanNot(cond);
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b68ea0f..db3e969 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,14 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
+  // Returns an instruction with the opposite boolean value from 'cond'.
+  // The instruction has been inserted into the graph, either as a constant, or
+  // before cursor.
+  HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
+
  private:
   void FindBackEdges(ArenaBitVector* visited);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
@@ -474,6 +482,11 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -551,11 +564,8 @@
   // Note that `other` *must* be populated before entering this function.
   bool IsIn(const HLoopInformation& other) const;
 
-  // Returns true if instruction is not defined within this loop or any loop nested inside
-  // this loop. If must_dominate is set, only definitions that actually dominate the loop
-  // header can be invariant. Otherwise, any definition outside the loop, including
-  // definitions that appear after the loop, is invariant.
-  bool IsLoopInvariant(HInstruction* instruction, bool must_dominate) const;
+  // Returns true if instruction is not defined within this loop.
+  bool IsDefinedOutOfTheLoop(HInstruction* instruction) const;
 
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
@@ -1024,7 +1034,6 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
-  M(Condition, BinaryOperation)                                         \
   M(CurrentMethod, Instruction)                                         \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
@@ -1057,6 +1066,7 @@
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
+  M(NativeDebugInfo, Instruction)                                       \
   M(Neg, UnaryOperation)                                                \
   M(NewArray, Instruction)                                              \
   M(NewInstance, Instruction)                                           \
@@ -1072,6 +1082,7 @@
   M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Ror, BinaryOperation)                                               \
   M(Shl, BinaryOperation)                                               \
   M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
@@ -1090,13 +1101,20 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64DataProcWithShifterOp, Instruction)                            \
+  M(Arm64IntermediateAddress, Instruction)                              \
+  M(Arm64MultiplyAccumulate, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1123,27 +1141,34 @@
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
-#define FOR_EACH_INSTRUCTION(M)                                         \
-  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+#define FOR_EACH_ABSTRACT_INSTRUCTION(M)                                \
+  M(Condition, BinaryOperation)                                         \
   M(Constant, Instruction)                                              \
   M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
   M(Invoke, Instruction)
 
+#define FOR_EACH_INSTRUCTION(M)                                         \
+  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+  FOR_EACH_ABSTRACT_INSTRUCTION(M)
+
 #define FORWARD_DECLARATION(type, super) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
 #define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKind() const OVERRIDE { return k##type; }          \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }  \
   const char* DebugName() const OVERRIDE { return #type; }              \
-  const H##type* As##type() const OVERRIDE { return this; }             \
-  H##type* As##type() OVERRIDE { return this; }                         \
   bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
     return other->Is##type();                                           \
   }                                                                     \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
+#define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
+  bool Is##type() const { return As##type() != nullptr; }               \
+  const H##type* As##type() const { return this; }                      \
+  H##type* As##type() { return this; }
+
 template <typename T> class HUseList;
 
 template <typename T>
@@ -1626,6 +1651,11 @@
     return holder_;
   }
 
+
+  bool IsFromInlinedInvoke() const {
+    return GetParent() != nullptr;
+  }
+
  private:
   // Record instructions' use entries of this environment for constant-time removal.
   // It should only be called by HInstruction when a new environment use is added.
@@ -1940,12 +1970,27 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  bool Is##type() const;                                                       \
+  const H##type* As##type() const;                                             \
+  H##type* As##type();
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
   virtual H##type* As##type() { return nullptr; }
-
-  FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+  FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
@@ -1968,7 +2013,12 @@
   // 2) Their inputs are identical.
   bool Equals(HInstruction* other) const;
 
-  virtual InstructionKind GetKind() const = 0;
+  // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
+  // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
+  // the virtual function because the __attribute__((__pure__)) doesn't really
+  // apply the strong requirement for virtual functions, preventing optimizations.
+  InstructionKind GetKind() const PURE;
+  virtual InstructionKind GetKindInternal() const = 0;
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
@@ -2266,7 +2316,7 @@
 
   virtual uint64_t GetValueAsUint64() const = 0;
 
-  DECLARE_INSTRUCTION(Constant);
+  DECLARE_ABSTRACT_INSTRUCTION(Constant);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HConstant);
@@ -2437,11 +2487,15 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
@@ -2523,7 +2577,7 @@
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
 
-  DECLARE_INSTRUCTION(UnaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HUnaryOperation);
@@ -2616,7 +2670,7 @@
   // one. Otherwise it returns null.
   HInstruction* GetLeastConstantLeft() const;
 
-  DECLARE_INSTRUCTION(BinaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
@@ -2644,7 +2698,7 @@
   // `instruction`, and disregard moves in between.
   bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
 
-  DECLARE_INSTRUCTION(Condition);
+  DECLARE_ABSTRACT_INSTRUCTION(Condition);
 
   virtual IfCondition GetCondition() const = 0;
 
@@ -3238,7 +3292,7 @@
   void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
 
   bool IsFromInlinedInvoke() const {
-    return GetEnvironment()->GetParent() != nullptr;
+    return GetEnvironment()->IsFromInlinedInvoke();
   }
 
   bool CanThrow() const OVERRIDE { return true; }
@@ -3253,7 +3307,7 @@
 
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
-  DECLARE_INSTRUCTION(Invoke);
+  DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
  protected:
   HInvoke(ArenaAllocator* arena,
@@ -3406,7 +3460,7 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
@@ -3420,7 +3474,7 @@
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
+        optimized_invoke_type_(optimized_invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
         dispatch_info_(dispatch_info) { }
@@ -3434,14 +3488,19 @@
     DCHECK(had_current_method_input || !needs_current_method_input);
 
     if (had_current_method_input && !needs_current_method_input) {
-      DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
-      RemoveInputAt(GetCurrentMethodInputIndex());
+      DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+      RemoveInputAt(GetSpecialInputIndex());
     }
     dispatch_info_ = dispatch_info;
   }
 
-  void InsertInputAt(size_t index, HInstruction* input);
-  void RemoveInputAt(size_t index);
+  void AddSpecialInput(HInstruction* input) {
+    // We allow only one special input.
+    DCHECK(!IsStringInit() && !HasCurrentMethodInput());
+    DCHECK(InputCount() == GetSpecialInputIndex() ||
+           (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
+    InsertInputAt(GetSpecialInputIndex(), input);
+  }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
@@ -3453,13 +3512,24 @@
     return return_type_ == Primitive::kPrimNot && !IsStringInit();
   }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  // Get the index of the special input, if any.
+  //
+  // If the invoke IsStringInit(), it initially has a HFakeString special argument
+  // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
+  // the "special input" is the current method pointer; otherwise there may be one
+  // platform-specific special input, such as PC-relative addressing base.
+  uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+
+  InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    optimized_invoke_type_ = invoke_type;
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
-  uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
   bool HasPcRelativeDexCache() const {
     return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
@@ -3467,16 +3537,17 @@
   bool HasCurrentMethodInput() const {
     // This function can be called only after the invoke has been fully initialized by the builder.
     if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
-      DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
       return true;
     } else {
-      DCHECK(InputCount() == GetCurrentMethodInputIndex() ||
-             !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      DCHECK(InputCount() == GetSpecialInputIndex() ||
+             !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
       return false;
     }
   }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3502,7 +3573,7 @@
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
   // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
@@ -3571,8 +3642,11 @@
     return input_record;
   }
 
+  void InsertInputAt(size_t index, HInstruction* input);
+  void RemoveInputAt(size_t index);
+
  private:
-  const InvokeType invoke_type_;
+  InvokeType optimized_invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3582,6 +3656,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
@@ -3637,9 +3712,10 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<1> {
+class HNewInstance : public HExpression<2> {
  public:
-  HNewInstance(HCurrentMethod* current_method,
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
@@ -3652,7 +3728,8 @@
         can_throw_(can_throw),
         finalizable_(finalizable),
         entrypoint_(entrypoint) {
-    SetRawInputAt(0, current_method);
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
   }
 
   uint16_t GetTypeIndex() const { return type_index_; }
@@ -3672,6 +3749,10 @@
 
   QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
 
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
   DECLARE_INSTRUCTION(NewInstance);
 
  private:
@@ -3679,7 +3760,7 @@
   const DexFile& dex_file_;
   const bool can_throw_;
   const bool finalizable_;
-  const QuickEntrypointEnum entrypoint_;
+  QuickEntrypointEnum entrypoint_;
 
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
@@ -4137,6 +4218,44 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
+class HRor : public HBinaryOperation {
+ public:
+  HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
+    : HBinaryOperation(result_type, value, distance) {}
+
+  template <typename T, typename U, typename V>
+  T Compute(T x, U y, V max_shift_value) const {
+    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
+                  "V is not the unsigned integer type corresponding to T");
+    V ux = static_cast<V>(x);
+    if ((y & max_shift_value) == 0) {
+      return static_cast<T>(ux);
+    } else {
+      const V reg_bits = sizeof(T) * 8;
+      return static_cast<T>(ux >> (y & max_shift_value)) |
+                           (x << (reg_bits - (y & max_shift_value)));
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Ror);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HRor);
+};
+
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
 class HParameterValue : public HExpression<0> {
@@ -4287,9 +4406,13 @@
       : HInstruction(SideEffects::None(), dex_pc),
         inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
         reg_number_(reg_number),
-        type_(type),
-        is_live_(false),
+        type_(ToPhiType(type)),
+        // Phis are constructed live and marked dead if conflicting or unused.
+        // Individual steps of SsaBuilder should assume that if a phi has been
+        // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+        is_live_(true),
         can_be_null_(true) {
+    DCHECK_NE(type_, Primitive::kPrimVoid);
   }
 
   // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
@@ -4750,6 +4873,23 @@
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
+// Pseudo-instruction which provides the native debugger with mapping information.
+// It ensures that we can generate line number and local variables at this point.
+class HNativeDebugInfo : public HTemplateInstruction<0> {
+ public:
+  explicit HNativeDebugInfo(uint32_t dex_pc)
+      : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {}
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(NativeDebugInfo);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo);
+};
+
 /**
  * Instruction to load a Class object.
  */
@@ -4760,13 +4900,15 @@
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
-             bool needs_access_check)
+             bool needs_access_check,
+             bool is_in_dex_cache)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
         is_referrers_class_(is_referrers_class),
         generate_clinit_check_(false),
         needs_access_check_(needs_access_check),
+        is_in_dex_cache_(is_in_dex_cache),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
@@ -4791,14 +4933,13 @@
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // Will call runtime and load the class if the class is not loaded yet.
-    // TODO: finer grain decision.
-    return !is_referrers_class_;
+    return CanCallRuntime();
   }
 
   bool MustGenerateClinitCheck() const {
     return generate_clinit_check_;
   }
+
   void SetMustGenerateClinitCheck(bool generate_clinit_check) {
     // The entrypoint the code generator is going to call does not do
     // clinit of the class.
@@ -4807,7 +4948,9 @@
   }
 
   bool CanCallRuntime() const {
-    return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_;
+    return MustGenerateClinitCheck() ||
+           (!is_referrers_class_ && !is_in_dex_cache_) ||
+           needs_access_check_;
   }
 
   bool NeedsAccessCheck() const {
@@ -4815,8 +4958,6 @@
   }
 
   bool CanThrow() const OVERRIDE {
-    // May call runtime and and therefore can throw.
-    // TODO: finer grain decision.
     return CanCallRuntime();
   }
 
@@ -4838,6 +4979,8 @@
     return SideEffects::CanTriggerGC();
   }
 
+  bool IsInDexCache() const { return is_in_dex_cache_; }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
@@ -4847,7 +4990,8 @@
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   bool generate_clinit_check_;
-  bool needs_access_check_;
+  const bool needs_access_check_;
+  const bool is_in_dex_cache_;
 
   ReferenceTypeInfo loaded_class_rti_;
 
@@ -4856,9 +5000,13 @@
 
 class HLoadString : public HExpression<1> {
  public:
-  HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc)
+  HLoadString(HCurrentMethod* current_method,
+              uint32_t string_index,
+              uint32_t dex_pc,
+              bool is_in_dex_cache)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
-        string_index_(string_index) {
+        string_index_(string_index),
+        is_in_dex_cache_(is_in_dex_cache) {
     SetRawInputAt(0, current_method);
   }
 
@@ -4876,6 +5024,7 @@
   bool NeedsEnvironment() const OVERRIDE { return false; }
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; }
   bool CanBeNull() const OVERRIDE { return false; }
+  bool IsInDexCache() const { return is_in_dex_cache_; }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -4885,6 +5034,7 @@
 
  private:
   const uint32_t string_index_;
+  const bool is_in_dex_cache_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
@@ -4912,6 +5062,7 @@
     return true;
   }
 
+  bool CanThrow() const OVERRIDE { return true; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
@@ -5524,6 +5675,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
@@ -5751,6 +5905,18 @@
   return &lhs == &rhs;
 }
 
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
+  inline const H##type* HInstruction::As##type() const {                       \
+    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+  }                                                                            \
+  inline H##type* HInstruction::As##type() {                                   \
+    return Is##type() ? static_cast<H##type*>(this) : nullptr;                 \
+  }
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc
new file mode 100644
index 0000000..ac2f093
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_arm64.h"
+#include "nodes.h"
+
+namespace art {
+
+using arm64::helpers::CanFitInShifterOperand;
+
+void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+                                                           /*out*/OpKind* op_kind,
+                                                           /*out*/int* shift_amount) {
+  DCHECK(CanFitInShifterOperand(instruction));
+  if (instruction->IsShl()) {
+    *op_kind = kLSL;
+    *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsShr()) {
+    *op_kind = kASR;
+    *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsUShr()) {
+    *op_kind = kLSR;
+    *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instruction->IsTypeConversion());
+    Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType();
+    Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType();
+    int result_size = Primitive::ComponentSize(result_type);
+    int input_size = Primitive::ComponentSize(input_type);
+    int min_size = std::min(result_size, input_size);
+    // This follows the logic in
+    // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+      // There is actually nothing to do. The register will be used as a W
+      // register, discarding the top bits. This is represented by the default
+      // encoding 'LSL 0'.
+      *op_kind = kLSL;
+      *shift_amount = 0;
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      *op_kind = kUXTH;
+    } else {
+      switch (min_size) {
+        case 1: *op_kind = kSXTB; break;
+        case 2: *op_kind = kSXTH; break;
+        case 4: *op_kind = kSXTW; break;
+        default:
+          LOG(FATAL) << "Unexpected min size " << min_size;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+  switch (op) {
+    case HArm64DataProcWithShifterOp::kLSL:  return os << "LSL";
+    case HArm64DataProcWithShifterOp::kLSR:  return os << "LSR";
+    case HArm64DataProcWithShifterOp::kASR:  return os << "ASR";
+    case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
+    case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
+    case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
+    case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
+    case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
+    case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+    default:
+      LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 885d3a2..18405f2 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -17,8 +17,83 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 
+#include "nodes.h"
+
 namespace art {
 
+class HArm64DataProcWithShifterOp : public HExpression<2> {
+ public:
+  enum OpKind {
+    kLSL,   // Logical shift left.
+    kLSR,   // Logical shift right.
+    kASR,   // Arithmetic shift right.
+    kUXTB,  // Unsigned extend byte.
+    kUXTH,  // Unsigned extend half-word.
+    kUXTW,  // Unsigned extend word.
+    kSXTB,  // Signed extend byte.
+    kSXTH,  // Signed extend half-word.
+    kSXTW,  // Signed extend word.
+
+    // Aliases.
+    kFirstShiftOp = kLSL,
+    kLastShiftOp = kASR,
+    kFirstExtensionOp = kUXTB,
+    kLastExtensionOp = kSXTW
+  };
+  HArm64DataProcWithShifterOp(HInstruction* instr,
+                              HInstruction* left,
+                              HInstruction* right,
+                              OpKind op,
+                              // The shift argument is unused if the operation
+                              // is an extension.
+                              int shift = 0,
+                              uint32_t dex_pc = kNoDexPc)
+      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+        instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
+    DCHECK(!instr->HasSideEffects());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
+    HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+    return instr_kind_ == other->instr_kind_ &&
+        op_kind_ == other->op_kind_ &&
+        shift_amount_ == other->shift_amount_;
+  }
+
+  static bool IsShiftOp(OpKind op_kind) {
+    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+  }
+
+  static bool IsExtensionOp(OpKind op_kind) {
+    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+  }
+
+  // Find the operation kind and shift amount from a bitfield move instruction.
+  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+                                       /*out*/OpKind* op_kind,
+                                       /*out*/int* shift_amount);
+
+  InstructionKind GetInstrKind() const { return instr_kind_; }
+  OpKind GetOpKind() const { return op_kind_; }
+  int GetShiftAmount() const { return shift_amount_; }
+
+  DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
+
+ private:
+  InstructionKind instr_kind_;
+  OpKind op_kind_;
+  int shift_amount_;
+
+  friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
+
 // This instruction computes an intermediate address pointing in the 'middle' of an object. The
 // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
 // never used across anything that can trigger GC.
@@ -42,6 +117,40 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
+class HArm64MultiplyAccumulate : public HExpression<3> {
+ public:
+  HArm64MultiplyAccumulate(Primitive::Type type,
+                           InstructionKind op,
+                           HInstruction* accumulator,
+                           HInstruction* mul_left,
+                           HInstruction* mul_right,
+                           uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 34f1fe5..2b0d522 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -25,6 +25,7 @@
 #include "utils/assembler.h"
 #include "utils/arm/assembler_thumb2.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/mips64/assembler_mips64.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
 
@@ -212,6 +213,34 @@
   Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
 }
 
+TEST_F(OptimizingCFITest, kMips64Adjust) {
+  // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+  static constexpr size_t kNumNops = 1u + (1u << 15);
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kMips64_adjust_head,
+      expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head));
+  expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+  expected_asm.insert(
+      expected_asm.end(),
+      expected_asm_kMips64_adjust_tail,
+      expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kMips64_adjust,
+      expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust));
+  SetUpFrame(kMips64);
+#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())->
+  mips64::Mips64Label target;
+  __ Beqc(mips64::A1, mips64::A2, &target);
+  // Push the target out of range of BEQC.
+  for (size_t i = 0; i != kNumNops; ++i) {
+    __ Nop();
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
+}
+
 #endif  // __ANDROID__
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 4571ebf..de85729 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -413,3 +413,57 @@
 // 0x0002007c: nop
 // 0x00020080: .cfi_restore_state
 // 0x00020080: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
+    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+    0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+};
+static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
+    0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
+    0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
+    0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
+    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28,
+    0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: bnec r5, r6, 0x0000002c ; +12
+// 0x00000024: auipc r1, 2
+// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x0000002c: nop
+//             ...
+// 0x0002002c: nop
+// 0x00020030: .cfi_remember_state
+// 0x00020030: daddiu r29, r29, 24
+// 0x00020034: .cfi_def_cfa_offset: 40
+// 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: ld r16, +16(r29)
+// 0x00020040: .cfi_restore: r16
+// 0x00020040: ld r17, +24(r29)
+// 0x00020044: .cfi_restore: r17
+// 0x00020044: ld r31, +32(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: daddiu r29, r29, 40
+// 0x0002004c: .cfi_def_cfa_offset: 0
+// 0x0002004c: jr r31
+// 0x00020050: nop
+// 0x00020054: .cfi_restore_state
+// 0x00020054: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2204921..831b626 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,10 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
@@ -110,24 +114,23 @@
 class PassObserver : public ValueObject {
  public:
   PassObserver(HGraph* graph,
-               const char* method_name,
                CodeGenerator* codegen,
                std::ostream* visualizer_output,
                CompilerDriver* compiler_driver)
       : graph_(graph),
-        method_name_(method_name),
+        cached_method_name_(),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
-        timing_logger_(method_name, true, true),
+        timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
-      if (!IsVerboseMethod(compiler_driver, method_name)) {
+      if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
         timing_logger_enabled_ = visualizer_enabled_ = false;
       }
       if (visualizer_enabled_) {
-        visualizer_.PrintHeader(method_name_);
+        visualizer_.PrintHeader(GetMethodName());
         codegen->SetDisassemblyInformation(&disasm_info_);
       }
     }
@@ -135,7 +138,7 @@
 
   ~PassObserver() {
     if (timing_logger_enabled_) {
-      LOG(INFO) << "TIMINGS " << method_name_;
+      LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
   }
@@ -148,6 +151,14 @@
 
   void SetGraphInBadState() { graph_in_bad_state_ = true; }
 
+  const char* GetMethodName() {
+    // PrettyMethod() is expensive, so we delay calling it until we actually have to.
+    if (cached_method_name_.empty()) {
+      cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+    }
+    return cached_method_name_.c_str();
+  }
+
  private:
   void StartPass(const char* pass_name) {
     // Dump graph first, then start timer.
@@ -206,7 +217,8 @@
   }
 
   HGraph* const graph_;
-  const char* method_name_;
+
+  std::string cached_method_name_;
 
   bool timing_logger_enabled_;
   TimingLogger timing_logger_;
@@ -383,10 +395,11 @@
       || instruction_set == kX86_64;
 }
 
-// Read barrier are supported only on ARM, x86 and x86-64 at the moment.
+// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
 // TODO: Add support for other architectures and remove this function
 static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
-  return instruction_set == kThumb2
+  return instruction_set == kArm64
+      || instruction_set == kThumb2
       || instruction_set == kX86
       || instruction_set == kX86_64;
 }
@@ -414,7 +427,7 @@
     return;
   }
   HInliner* inliner = new (graph->GetArena()) HInliner(
-    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -426,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
@@ -491,12 +515,13 @@
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
@@ -506,7 +531,6 @@
       graph, stats, "instruction_simplifier_after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
-
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations1[] = {
@@ -518,48 +542,30 @@
     dce1,
     simplify2
   };
-
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
   MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
 
-  // TODO: Update passes incompatible with try/catch so we have the same
-  //       pipeline for all methods.
-  if (graph->HasTryCatch()) {
-    HOptimization* optimizations2[] = {
-      boolean_simplify,
-      side_effects,
-      gvn,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  } else {
-    HOptimization* optimizations2[] = {
-      // BooleanSimplifier depends on the InstructionSimplifier removing
-      // redundant suspend checks to recognize empty blocks.
-      boolean_simplify,
-      fold2,  // TODO: if we don't inline we can also skip fold2.
-      side_effects,
-      gvn,
-      licm,
-      induction,
-      bce,
-      simplify3,
-      lse,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  }
+  HOptimization* optimizations2[] = {
+    // BooleanSimplifier depends on the InstructionSimplifier removing
+    // redundant suspend checks to recognize empty blocks.
+    boolean_simplify,
+    fold2,  // TODO: if we don't inline we can also skip fold2.
+    side_effects,
+    gvn,
+    licm,
+    induction,
+    bce,
+    fold3,  // evaluates code generated by dynamic bce
+    simplify3,
+    lse,
+    dce2,
+    // The codegen has a few assumptions that only the instruction simplifier
+    // can satisfy. For example, the code generator does not expect to see a
+    // HTypeConversion from a type to the same type.
+    simplify4,
+  };
+  RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
   AllocateRegisters(graph, codegen, pass_observer);
@@ -598,8 +604,6 @@
   stack_map.resize(codegen->ComputeStackMapsSize());
   codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
-
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -610,7 +614,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
+      ArrayRef<const SrcMapElem>(),
       ArrayRef<const uint8_t>(),  // mapping_table.
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(),  // native_gc_map.
@@ -634,7 +638,6 @@
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -645,7 +648,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
+      ArrayRef<const SrcMapElem>(),
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
@@ -664,7 +667,6 @@
                                               jobject class_loader,
                                               const DexFile& dex_file,
                                               Handle<mirror::DexCache> dex_cache) const {
-  std::string method_name = PrettyMethod(method_idx, dex_file);
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -728,7 +730,6 @@
       compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
 
   PassObserver pass_observer(graph,
-                             method_name.c_str(),
                              codegen.get(),
                              visualizer_output_.get(),
                              compiler_driver);
@@ -742,8 +743,8 @@
     ArtMethod* art_method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
     // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
     if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
       interpreter_metadata = art_method->GetQuickenedInfo();
     }
   }
@@ -756,7 +757,7 @@
                         interpreter_metadata,
                         dex_cache);
 
-  VLOG(compiler) << "Building " << method_name;
+  VLOG(compiler) << "Building " << pass_observer.GetMethodName();
 
   {
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
@@ -766,13 +767,14 @@
     }
   }
 
-  VLOG(compiler) << "Optimizing " << method_name;
+  VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
   if (run_optimizations_) {
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
       if (!graph->TryBuildingSsa()) {
         // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
+        LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
+            << ": it contains a non natural loop";
         MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
         pass_observer.SetGraphInBadState();
         return nullptr;
@@ -839,6 +841,7 @@
                    dex_file,
                    dex_cache));
     if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
       if (run_optimizations_) {
         method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
@@ -849,7 +852,7 @@
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
       MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
     }
   }
 
@@ -925,6 +928,7 @@
   if (stack_map_data == nullptr) {
     return false;
   }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
   const void* code = code_cache->CommitCode(
       self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..6296eed 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,18 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +47,12 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
+  kInlinedMonomorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
   kLastStat
 };
 
@@ -66,20 +65,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +90,42 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "kMegamorphicCall"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 30bcf19..176c50c 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -169,7 +169,7 @@
         // If `other_move` was swapped, we iterate again to find a new
         // potential cycle.
         required_swap = nullptr;
-        i = 0;
+        i = -1;
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
         // move by just returning from this `PerforrmMove`.
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 46e6f3e..5e8fe37 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -609,4 +609,36 @@
   }
 }
 
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TypeParam resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(0),
+        Location::RegisterLocation(3),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(2),
+        Primitive::kPrimInt,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)",
+          resolver.GetMessage().c_str());
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index c2894c7..a385448 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
  */
 
 #include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
 
 namespace art {
 namespace x86 {
@@ -26,6 +27,15 @@
  public:
   explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
 
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
     BinaryFP(add);
@@ -70,9 +80,13 @@
   }
 
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializePCRelativeBasePointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,22 +98,22 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializePCRelativeBasePointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializePCRelativeBasePointer(insn);
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
@@ -111,11 +125,10 @@
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
-      InitializePCRelativeBasePointer(invoke);
+      InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
-      uint32_t index = invoke_static_or_direct->GetCurrentMethodInputIndex();
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
-      invoke_static_or_direct->InsertInputAt(index, base_);
+      invoke_static_or_direct->AddSpecialInput(base_);
     }
     // Ensure that we can load FP arguments from the constant area.
     for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
@@ -134,6 +147,7 @@
 void PcRelativeFixups::Run() {
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f3d075c..d1770b7 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -48,22 +48,34 @@
 }
 
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
-  // Try to find a static invoke from which this check originated.
-  HInvokeStaticOrDirect* invoke = nullptr;
+  // Try to find a static invoke or a new-instance from which this check originated.
+  HInstruction* implicit_clinit = nullptr;
   for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
-    if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) {
-      invoke = user->AsInvokeStaticOrDirect();
-      DCHECK(invoke->IsStaticWithExplicitClinitCheck());
-      invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
+        CanMoveClinitCheck(check, user)) {
+      implicit_clinit = user;
+      if (user->IsInvokeStaticOrDirect()) {
+        DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck());
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+      } else {
+        DCHECK(user->IsNewInstance());
+        // We delegate the initialization duty to the allocation.
+        if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) {
+          user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved);
+        }
+      }
       break;
     }
   }
-  // If we found a static invoke for merging, remove the check from all other static invokes.
-  if (invoke != nullptr) {
+  // If we found a static invoke or new-instance for merging, remove the check
+  // from dominated static invokes.
+  if (implicit_clinit != nullptr) {
     for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
       HInstruction* user = it.Current()->GetUser();
-      DCHECK(invoke->StrictlyDominates(user));  // All other uses must be dominated.
+      // All other uses must be dominated.
+      DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
       it.Advance();  // Advance before we remove the node, reference to the next node is preserved.
       if (user->IsInvokeStaticOrDirect()) {
         user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
@@ -77,8 +89,8 @@
 
   check->ReplaceWith(load_class);
 
-  if (invoke != nullptr) {
-    // Remove the check from the graph. It has been merged into the invoke.
+  if (implicit_clinit != nullptr) {
+    // Remove the check from the graph. It has been merged into the invoke or new-instance.
     check->GetBlock()->RemoveInstruction(check);
     // Check if we can merge the load class as well.
     if (can_merge_with_load_class && !load_class->HasUses()) {
@@ -92,6 +104,29 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
+  HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
+  bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+  // Change the entrypoint to kQuickAllocObject if either:
+  // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
+  // - the class needs access checks (we do not know if it's finalizable),
+  // - or the load class has only one use.
+  if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
+    instruction->SetEntrypoint(kQuickAllocObject);
+    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+    // The allocation entry point that deals with access checks does not work with inlined
+    // methods, so we need to check whether this allocation comes from an inlined method.
+    if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) {
+      // We can remove the load class from the graph. If it needed access checks, we delegate
+      // the access check to the allocation.
+      if (load_class->NeedsAccessCheck()) {
+        instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+      }
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  }
+}
+
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   bool needs_materialization = false;
   if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index a70fb30..9b24342 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,7 @@
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
 
   bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
 
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index c98f43e..bde54ee 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -63,7 +63,6 @@
             : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
         phi->ReplaceInput(equivalent, i);
         if (equivalent->IsPhi()) {
-          equivalent->AsPhi()->SetLive();
           AddToWorklist(equivalent->AsPhi());
         } else if (equivalent == input) {
           // The input has changed its type. It can be an input of other phis,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0d05c49..fea903d 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -127,6 +127,87 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -136,6 +217,7 @@
   }
 
   ProcessWorklist();
+  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -387,7 +469,7 @@
       // but then we would need to pass it to RTPVisitor just for this debug check. Since
       // the method is from the String class, the null loader is good enough.
       Handle<mirror::ClassLoader> loader;
-      ArtMethod* method = cl->ResolveMethod(
+      ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
@@ -534,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -588,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -653,7 +741,9 @@
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
+  if (!parent_rti.IsValid()) {
+    return;
+  }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
@@ -665,8 +755,6 @@
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
   }
-
-  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -683,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -711,6 +799,7 @@
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -770,7 +859,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,6 +57,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
+  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 080f970..8706854 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -472,7 +472,7 @@
                                   HInstruction** input2) {
   HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
@@ -624,7 +624,7 @@
                                 HInstruction** field,
                                 HInstruction** ret) {
   HGraph* graph = CreateGraph(allocator);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079..5e1d1d9 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@
   }
 
   // TODO: Avoid CompilerDriver.
-  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
   MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
   int vtable_idx;
   uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@
       invoke->GetDexPc(),
       false /* update_stats: already updated in builder */,
       true /* enable_devirtualization */,
-      &invoke_type,
+      &optimized_invoke_type,
       &target_method,
       &vtable_idx,
       &direct_code,
       &direct_method);
-  DCHECK(success);
-  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
-  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
-  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
 
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 5190eb3..9e6cfbe 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -22,6 +22,13 @@
 
 namespace art {
 
+// Returns whether this is a loop header phi which was eagerly created but later
+// found inconsistent due to the vreg being undefined in one of its predecessors.
+// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
+static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
+  return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
+}
+
 /**
  * A debuggable application may require to reviving phis, to ensure their
  * associated DEX register is available to a debugger. This class implements
@@ -165,17 +172,15 @@
 void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
+    if (IsUndefinedLoopHeaderPhi(phi)) {
+      DCHECK(phi->IsDead());
+      continue;
+    }
     if (phi->IsDead() && phi->HasEnvironmentUses()) {
       phi->SetLive();
       if (block->IsLoopHeader()) {
-        // Give a type to the loop phi to guarantee convergence of the algorithm.
-        // Note that the dead phi may already have a type if it is an equivalent
-        // generated for a typed LoadLocal. In that case we do not change the
-        // type because it could lead to an unsupported PrimNot/Float/Double ->
-        // PrimInt/Long transition and create same type equivalents.
-        if (phi->GetType() == Primitive::kPrimVoid) {
-          phi->SetType(phi->InputAt(0)->GetType());
-        }
+        // Loop phis must have a type to guarantee convergence of the algorithm.
+        DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
         AddToWorklist(phi);
       } else {
         // Because we are doing a reverse post order visit, all inputs of
@@ -220,6 +225,27 @@
   ProcessWorklist();
 }
 
+void SsaBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocal(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
   for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
@@ -283,15 +309,7 @@
   }
 
   // 2) Set inputs of loop phis.
-  for (HBasicBlock* block : loop_headers_) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
-        phi->AddInput(input);
-      }
-    }
-  }
+  SetLoopHeaderPhiInputs();
 
   // 3) Mark dead phis. This will mark phis that are only used by environments:
   // at the DEX level, the type of these phis does not need to be consistent, but
@@ -403,8 +421,13 @@
       for (size_t i = 0; i < vregs; ++i) {
         // No point in creating the catch phi if it is already undefined at
         // the first throwing instruction.
-        if ((*current_locals_)[i] != nullptr) {
-          HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena) HPhi(
+              arena,
+              i,
+              0,
+              current_local_value->GetType());
           block->AddPhi(phi);
           (*locals)[i] = phi;
         }
@@ -451,7 +474,10 @@
       HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
       if (incoming != nullptr) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            0,
+            incoming->GetType());
         block->AddPhi(phi);
         (*current_locals_)[local] = phi;
       }
@@ -484,8 +510,12 @@
       }
 
       if (is_different) {
+        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            block->GetPredecessors().size(),
+            first_input->GetType());
         for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
           HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
           phi->SetRawInputAt(i, pred_value);
@@ -583,8 +613,16 @@
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     return new_phi;
   } else {
-    DCHECK_EQ(next->GetType(), type);
-    return next->AsPhi();
+    HPhi* next_phi = next->AsPhi();
+    DCHECK_EQ(next_phi->GetType(), type);
+    if (next_phi->IsDead()) {
+      // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
+      // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
+      // cannot revive undefined loop header phis because they cannot have uses.
+      DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
+      next_phi->SetLive();
+    }
+    return next_phi;
   }
 }
 
@@ -638,7 +676,36 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
+  uint32_t reg_number = store->GetLocal()->GetRegNumber();
+  HInstruction* stored_value = store->InputAt(1);
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+
   store->GetBlock()->RemoveInstruction(store);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 79f1a28..dcce5e4 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -81,6 +81,7 @@
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
+  void SetLoopHeaderPhiInputs();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 72f9ddd..a3219dc 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -16,6 +16,8 @@
 
 #include "ssa_phi_elimination.h"
 
+#include "base/arena_containers.h"
+
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
@@ -24,22 +26,36 @@
 }
 
 void SsaDeadPhiElimination::MarkDeadPhis() {
+  // Phis are constructed live and should not be revived if previously marked
+  // dead. This algorithm temporarily breaks that invariant but we DCHECK that
+  // only phis which were initially live are revived.
+  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter());
+
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
-      // Set dead ahead of running through uses. The phi may have no use.
-      phi->SetDead();
+      if (phi->IsDead()) {
+        continue;
+      }
+
+      bool has_non_phi_use = false;
       for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        HUseListNode<HInstruction*>* current = use_it.Current();
-        HInstruction* user = current->GetUser();
-        if (!user->IsPhi()) {
-          worklist_.push_back(phi);
-          phi->SetLive();
+        if (!use_it.Current()->GetUser()->IsPhi()) {
+          has_non_phi_use = true;
           break;
         }
       }
+
+      if (has_non_phi_use) {
+        worklist_.push_back(phi);
+      } else {
+        phi->SetDead();
+        if (kIsDebugBuild) {
+          initially_live.insert(phi);
+        }
+      }
     }
   }
 
@@ -48,10 +64,13 @@
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
     for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HInstruction* input = it.Current();
-      if (input->IsPhi() && input->AsPhi()->IsDead()) {
-        worklist_.push_back(input->AsPhi());
-        input->AsPhi()->SetLive();
+      HPhi* input = it.Current()->AsPhi();
+      if (input != nullptr && input->IsDead()) {
+        // Input is a dead phi. Revive it and add to the worklist. We make sure
+        // that the phi was not dead initially (see definition of `initially_live`).
+        DCHECK(ContainsElement(initially_live, input));
+        input->SetLive();
+        worklist_.push_back(input);
       }
     }
   }
@@ -118,7 +137,6 @@
     }
 
     if (phi->InputCount() == 0) {
-      DCHECK(phi->IsCatchPhi());
       DCHECK(phi->IsDead());
       continue;
     }
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 39e5259..48465e6 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -57,7 +57,7 @@
       __ LoadFromOffset(kLoadWord, PC, R0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (R0).
-      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, PC, IP, offset.Int32Value());
       break;
     case kQuickAbi:  // R9 holds Thread*.
@@ -91,7 +91,7 @@
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
       __ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1),
                       Arm64ManagedRegister::FromXRegister(X0),
-                      Offset(JNIEnvExt::SelfOffset().Int32Value()));
+                      Offset(JNIEnvExt::SelfOffset(8).Int32Value()));
 
       __ JumpTo(Arm64ManagedRegister::FromXRegister(IP1), Offset(offset.Int32Value()),
                 Arm64ManagedRegister::FromXRegister(IP0));
@@ -126,7 +126,7 @@
       __ LoadFromOffset(kLoadWord, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // S1 holds Thread*.
@@ -158,7 +158,7 @@
       __ LoadFromOffset(kLoadDoubleword, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset(8).Int32Value());
       __ LoadFromOffset(kLoadDoubleword, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // Fall-through.
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 9fb2245..66d5ac3 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -25,12 +25,12 @@
 namespace art {
 
 // Create code that will invoke the function held in thread local storage.
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> entry_point_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_);
+const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa,
+                                               EntryPointCallingConvention abi,
+                                               ThreadOffset<4> entry_point_offset);
+const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa,
+                                               EntryPointCallingConvention abi,
+                                               ThreadOffset<8> entry_point_offset);
 
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 68e3956..dead8fd 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -342,9 +342,9 @@
       return IsAbsoluteUint<12>(offset);
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kLoadWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -360,9 +360,9 @@
       return IsAbsoluteUint<12>(offset);
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kStoreWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 4a6e6d7..b79c2f0 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -22,6 +22,7 @@
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
@@ -697,10 +698,9 @@
   // Most of these are pure virtual as they need to be implemented per instruction set.
 
   // Create a new literal with a given value.
-  // NOTE: Force the template parameter to be explicitly specified. In the absence of
-  // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+  // NOTE: Force the template parameter to be explicitly specified.
   template <typename T>
-  Literal* NewLiteral(typename std::decay<T>::type value) {
+  Literal* NewLiteral(typename Identity<T>::type value) {
     static_assert(std::is_integral<T>::value, "T must be an integral type.");
     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
   }
@@ -878,7 +878,15 @@
                                      Register rn,
                                      Opcode opcode,
                                      uint32_t immediate,
+                                     SetCc set_cc,
                                      ShifterOperand* shifter_op) = 0;
+  bool ShifterOperandCanHold(Register rd,
+                             Register rn,
+                             Opcode opcode,
+                             uint32_t immediate,
+                             ShifterOperand* shifter_op) {
+    return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op);
+  }
 
   virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a7dbacd..ebca25b 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,6 +57,7 @@
                                            Register rn ATTRIBUTE_UNUSED,
                                            Opcode opcode ATTRIBUTE_UNUSED,
                                            uint32_t immediate,
+                                           SetCc set_cc ATTRIBUTE_UNUSED,
                                            ShifterOperand* shifter_op) {
   return ShifterOperandCanHoldArm32(immediate, shifter_op);
 }
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 5233dcb..bf332fe 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -297,7 +297,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
@@ -389,8 +391,6 @@
   void EmitBranch(Condition cond, Label* label, bool link);
   static int32_t EncodeBranchOffset(int offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
 };
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 297cc54..f341030 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -500,6 +500,7 @@
                                             Register rn ATTRIBUTE_UNUSED,
                                             Opcode opcode,
                                             uint32_t immediate,
+                                            SetCc set_cc,
                                             ShifterOperand* shifter_op) {
   shifter_op->type_ = ShifterOperand::kImmediate;
   shifter_op->immed_ = immediate;
@@ -508,7 +509,8 @@
   switch (opcode) {
     case ADD:
     case SUB:
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+      // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
+      if (immediate < (1 << 12) && set_cc != kCcSet) {
         return true;
       }
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -1239,7 +1241,10 @@
       // The only thumb1 instructions with a register and an immediate are ADD and SUB
       // with a 3-bit immediate, and RSB with zero immediate.
       if (opcode == ADD || opcode == SUB) {
-        if (!IsUint<3>(so.GetImmediate())) {
+        if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+          return true;  // Cannot match "setflags".
+        }
+        if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) {
           return true;
         }
       } else {
@@ -1249,8 +1254,12 @@
       // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
       if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
         return true;
+      } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+        return true;  // Cannot match "setflags" for ADD, SUB or MOV.
       } else {
-        if (!IsUint<8>(so.GetImmediate())) {
+        // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode.
+        if (!IsUint<8>(so.GetImmediate()) &&
+            (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) {
           return true;
         }
       }
@@ -1349,7 +1358,8 @@
   int32_t encoding = 0;
   if (so.IsImmediate()) {
     // Check special cases.
-    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
+    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) &&
+        /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) {
       if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
@@ -1601,12 +1611,18 @@
   uint8_t rn_shift = 3;
   uint8_t immediate_shift = 0;
   bool use_immediate = false;
-  uint32_t immediate = 0;  // Should be at most 9 bits but keep the full immediate for CHECKs.
+  uint32_t immediate = 0;  // Should be at most 10 bits but keep the full immediate for CHECKs.
   uint8_t thumb_opcode;
 
   if (so.IsImmediate()) {
     use_immediate = true;
     immediate = so.GetImmediate();
+    if (!IsUint<10>(immediate)) {
+      // Flip ADD/SUB.
+      opcode = (opcode == ADD) ? SUB : ADD;
+      immediate = -immediate;
+      DCHECK(IsUint<10>(immediate));  // More stringent checks below.
+    }
   }
 
   switch (opcode) {
@@ -1643,7 +1659,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 3U /* 0b11 */;
           opcode_shift = 12;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1657,7 +1673,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 5U /* 0b101 */;
           opcode_shift = 11;
-          CHECK_LT(immediate, (1u << 10));
+          CHECK(IsUint<10>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rn from instruction.
@@ -1667,11 +1683,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 14U /* 0b01110 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 6U /* 0b110 */;
           rd_shift = 8;
@@ -1701,7 +1719,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 0x61 /* 0b1100001 */;
           opcode_shift = 7;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1712,11 +1730,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 15U /* 0b01111 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 7U /* 0b111 */;
           rd_shift = 8;
@@ -2568,30 +2588,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
@@ -3220,7 +3229,7 @@
 
 void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, cond, set_cc);
+  EmitShift(rd, rm, RRX, 0, cond, set_cc);
 }
 
 
@@ -3411,25 +3420,30 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) {
     add(rd, rn, shifter_op, cond, set_cc);
-  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) {
     sub(rd, rn, shifter_op, cond, set_cc);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond, kCcKeep);
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
-    } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond, kCcKeep);
-      sub(rd, rn, ShifterOperand(IP), cond, set_cc);
+    // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
+    Register temp = (rd != rn) ? rd : IP;
+    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (High16Bits(-value) == 0) {
+      movw(temp, Low16Bits(-value), cond);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
     } else {
-      movw(IP, Low16Bits(value), cond);
+      movw(temp, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
-        movt(IP, value_high, cond);
+        movt(temp, value_high, cond);
       }
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
     }
   }
 }
@@ -3439,9 +3453,9 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
     cmp(rn, shifter_op, cond);
-  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
     cmn(rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
@@ -3469,6 +3483,73 @@
   }
 }
 
+int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
+  switch (type) {
+    case kLoadSignedByte:
+    case kLoadSignedHalfword:
+    case kLoadUnsignedHalfword:
+    case kLoadUnsignedByte:
+    case kLoadWord:
+      // We can encode imm12 offset.
+      return 0xfffu;
+    case kLoadSWord:
+    case kLoadDWord:
+    case kLoadWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
+  switch (type) {
+    case kStoreHalfword:
+    case kStoreByte:
+    case kStoreWord:
+      // We can encode imm12 offset.
+      return 0xfff;
+    case kStoreSWord:
+    case kStoreDWord:
+    case kStoreWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                                              int32_t offset,
+                                              /*out*/ int32_t* add_to_base,
+                                              /*out*/ int32_t* offset_for_load_store) {
+  int32_t other_bits = offset & ~allowed_offset_bits;
+  if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
+    *add_to_base = offset & ~allowed_offset_bits;
+    *offset_for_load_store = offset & allowed_offset_bits;
+    return true;
+  }
+  return false;
+}
+
+int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                               Register temp,
+                                               Register base,
+                                               int32_t offset,
+                                               Condition cond) {
+  DCHECK_NE(offset & ~allowed_offset_bits, 0);
+  int32_t add_to_base, offset_for_load;
+  if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+    AddConstant(temp, base, add_to_base, cond, kCcKeep);
+    return offset_for_load;
+  } else {
+    LoadImmediate(temp, offset, cond);
+    add(temp, temp, ShifterOperand(base), cond, kCcKeep);
+    return 0;
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb.
@@ -3479,12 +3560,26 @@
                                      Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+    // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
+    int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
+    DCHECK_NE(offset & ~allowed_offset_bits, 0);
+    int32_t add_to_base, offset_for_load;
+    if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      AddConstant(reg, base, add_to_base, cond, kCcKeep);
+      base = reg;
+      offset = offset_for_load;
+    } else {
+      Register temp = (reg == base) ? IP : reg;
+      LoadImmediate(temp, offset, cond);
+      // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep);
+      base = reg;
+      offset = 0;
+    }
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(type, offset));
   switch (type) {
     case kLoadSignedByte:
       ldrsb(reg, Address(base, offset), cond);
@@ -3510,7 +3605,6 @@
   }
 }
 
-
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
 void Thumb2Assembler::LoadSFromOffset(SRegister reg,
@@ -3519,12 +3613,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
   vldrs(reg, Address(base, offset), cond);
 }
 
@@ -3537,12 +3629,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
   vldrd(reg, Address(base, offset), cond);
 }
 
@@ -3573,12 +3663,12 @@
         offset += kRegisterSize;
       }
     }
-    LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
+    // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
+    // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond);
     base = tmp_reg;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(type, offset));
   switch (type) {
     case kStoreByte:
       strb(reg, Address(base, offset), cond);
@@ -3611,12 +3701,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
   vstrs(reg, Address(base, offset), cond);
 }
 
@@ -3629,12 +3717,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
   vstrd(reg, Address(base, offset), cond);
 }
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index e183613..bf07b2d 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -342,7 +342,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
@@ -729,13 +731,23 @@
   void EmitBranch(Condition cond, Label* label, bool link, bool x);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
 
+  static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
+  static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
+  bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                               int32_t offset,
+                               /*out*/ int32_t* add_to_base,
+                               /*out*/ int32_t* offset_for_load_store);
+  int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                Register temp,
+                                Register base,
+                                int32_t offset,
+                                Condition cond);
+
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
   bool can_relocate_branches_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index cb4b20b..7b32b0f 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -243,7 +243,7 @@
 
   const char* expected =
       "subs r1, r0, #42\n"
-      "subw r1, r0, #42\n"
+      "sub.w r1, r0, #42\n"
       "subs r1, r0, r2, asr #31\n"
       "sub r1, r0, r2, asr #31\n";
   DriverStr(expected, "sub");
@@ -257,7 +257,7 @@
 
   const char* expected =
       "adds r1, r0, #42\n"
-      "addw r1, r0, #42\n"
+      "add.w r1, r0, #42\n"
       "adds r1, r0, r2, asr #31\n"
       "add r1, r0, r2, asr #31\n";
   DriverStr(expected, "add");
@@ -305,21 +305,18 @@
   __ StoreToOffset(type, arm::IP, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #4096\n"   // AddConstant(ip, sp, 4096)
       "str r0, [ip, #0]\n"
 
-      "str r5, [sp, #-4]!\n"  // Push(r5)
-      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "str ip, [r5, #0]\n"
-      "ldr r5, [sp], #4\n"    // Pop(r5)
+      "str r5, [sp, #-4]!\n"    // Push(r5)
+      "add.w r5, sp, #4096\n"   // AddConstant(r5, 4100 & ~0xfff)
+      "str ip, [r5, #4]\n"      // StoreToOffset(type, ip, r5, 4100 & 0xfff)
+      "ldr r5, [sp], #4\n"      // Pop(r5)
 
-      "str r6, [sp, #-4]!\n"  // Push(r6)
-      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
-      "add r6, r6, r5\n"
-      "str ip, [r6, #0]\n"
-      "ldr r6, [sp], #4\n";   // Pop(r6)
+      "str r6, [sp, #-4]!\n"    // Push(r6)
+      "add.w r6, r5, #4096\n"   // AddConstant(r6, r5, 4096 & ~0xfff)
+      "str ip, [r6, #0]\n"      // StoreToOffset(type, ip, r6, 4096 & 0xfff)
+      "ldr r6, [sp], #4\n";     // Pop(r6)
   DriverStr(expected, "StoreWordToNonThumbOffset");
 }
 
@@ -360,20 +357,17 @@
   __ StoreToOffset(type, arm::R11, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #1024\n"     // AddConstant(ip, sp, 1024)
       "strd r0, r1, [ip, #0]\n"
 
       "str r5, [sp, #-4]!\n"      // Push(r5)
-      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "strd r11, ip, [r5, #0]\n"
+      "add.w r5, sp, #1024\n"     // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc)
+      "strd r11, ip, [r5, #4]\n"  // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc)
       "ldr r5, [sp], #4\n"        // Pop(r5)
 
       "str r6, [sp, #-4]!\n"      // Push(r6)
-      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
-      "add r6, r6, r5\n"
-      "strd r11, ip, [r6, #0]\n"
+      "add.w r6, r5, #1024\n"     // AddConstant(r6, r5, 1024 & ~0x3fc)
+      "strd r11, ip, [r6, #0]\n"  // StoreToOffset(type, r11, r6, 1024 & 0x3fc)
       "ldr r6, [sp], #4\n";       // Pop(r6)
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index f1233ca..2579ddb 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -147,7 +147,7 @@
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -188,14 +188,66 @@
     return str;
   }
 
+  template <typename ImmType, typename Reg1, typename Reg2>
+  std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2),
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              int imm_bits,
+                                              std::string fmt) {
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
+
+    WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          ImmType new_imm = CreateImmediate(imm);
+          (assembler_.get()->*f)(new_imm, *reg1, *reg2);
+          std::string base = fmt;
+
+          std::string reg1_string = (this->*GetName1)(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = (this->*GetName2)(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          size_t imm_index = base.find(IMM_TOKEN);
+          if (imm_index != std::string::npos) {
+            std::ostringstream sreg;
+            sreg << imm;
+            std::string imm_string = sreg.str();
+            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <typename RegType, typename ImmType>
   std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
-                                              int imm_bits,
-                                              const std::vector<Reg*> registers,
-                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                              std::string fmt) {
+                                             int imm_bits,
+                                             const std::vector<Reg*> registers,
+                                             std::string (AssemblerTest::*GetName)(const RegType&),
+                                             std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg : registers) {
       for (int64_t imm : imms) {
@@ -291,6 +343,17 @@
                                                      fmt);
   }
 
+  template <typename ImmType>
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+    return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  imm_bits,
+                                                                  fmt);
+  }
+
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
@@ -840,12 +903,17 @@
     return str;
   }
 
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
   void DriverWrapper(std::string assembly_text, std::string test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
+    Pad(*data);
     test_helper_->Driver(*data, assembly_text, test_name);
   }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2ae8841..0ef0dc1 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -135,7 +135,8 @@
     toolsdir.c_str(), filename);
   if (kPrintResults) {
     // Print the results only, don't check. This is used to generate new output for inserting
-    // into the .inc file.
+    // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code.
+    strcat(cmd, " | sed '-es/^/  \"/' | sed '-es/$/\\\\n\",/'");
     int cmd_result3 = system(cmd);
     ASSERT_EQ(cmd_result3, 0) << strerror(errno);
   } else {
@@ -466,6 +467,38 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
+TEST(Thumb2AssemblerTest, ShiftImmediate) {
+  // Note: This test produces the same results as DataProcessingShiftedRegister
+  // but it does so using shift functions instead of mov().
+  arm::Thumb2Assembler assembler;
+
+  // 16-bit variants.
+  __ Lsl(R3, R4, 4);
+  __ Lsr(R3, R4, 5);
+  __ Asr(R3, R4, 6);
+
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ Ror(R3, R4, 7);
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ Rrx(R3, R4);
+
+  // 32 bit variants (not setting condition codes).
+  __ Lsl(R3, R4, 4, AL, kCcKeep);
+  __ Lsr(R3, R4, 5, AL, kCcKeep);
+  __ Asr(R3, R4, 6, AL, kCcKeep);
+  __ Ror(R3, R4, 7, AL, kCcKeep);
+  __ Rrx(R3, R4, AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ Lsls(R8, R4, 4);
+  __ Lsrs(R8, R4, 5);
+  __ Asrs(R8, R4, 6);
+  __ Rors(R8, R4, 7);
+  __ Rrxs(R8, R4);
+
+  EmitAndCheck(&assembler, "ShiftImmediate");
+}
 
 TEST(Thumb2AssemblerTest, BasicLoad) {
   arm::Thumb2Assembler assembler;
@@ -800,11 +833,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
@@ -823,29 +857,80 @@
 
   __ add(R2, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
   __ add(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+  __ add(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
-  __ sub(SP, SP, ShifterOperand(0x50));     // 16 bit
-  __ sub(R0, SP, ShifterOperand(0x50));     // 32 bit
-  __ sub(R8, SP, ShifterOperand(0x50));     // 32 bit.
+  __ sub(SP, SP, ShifterOperand(0x50));   // 16 bit
+  __ sub(R0, SP, ShifterOperand(0x50));   // 32 bit
+  __ sub(R8, SP, ShifterOperand(0x50));   // 32 bit.
 
-  __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
+TEST(Thumb2AssemblerTest, LoadFromOffset) {
+  arm::Thumb2Assembler assembler;
+
+  __ LoadFromOffset(kLoadWord, R2, R4, 12);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadWord, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 12);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
+  __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
+
+  __ LoadFromOffset(kLoadWord, R0, R12, 12);  // 32-bit because of R12.
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
+
+  __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12);
+
+  EmitAndCheck(&assembler, "LoadFromOffset");
+}
+
 TEST(Thumb2AssemblerTest, StoreToOffset) {
   arm::Thumb2Assembler assembler;
 
-  __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
-  __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
-  __ StoreToOffset(kStoreWord, R0, R12, 12);
-  __ StoreToOffset(kStoreHalfword, R0, R12, 12);
-  __ StoreToOffset(kStoreByte, R2, R12, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreWord, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 12);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 12);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
+  __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
+
+  __ StoreToOffset(kStoreWord, R0, R12, 12);  // 32-bit because of R12.
+  __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
+
+  __ StoreToOffset(kStoreByte, R2, R4, 12);
 
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-
 TEST(Thumb2AssemblerTest, IfThen) {
   arm::Thumb2Assembler assembler;
 
@@ -1295,6 +1380,252 @@
   EmitAndCheck(&assembler, "CompareAndBranch");
 }
 
+TEST(Thumb2AssemblerTest, AddConstant) {
+  arm::Thumb2Assembler assembler;
+
+  // Low registers, Rd != Rn.
+  __ AddConstant(R0, R1, 0);                          // MOV.
+  __ AddConstant(R0, R1, 1);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 7);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn.
+  __ AddConstant(R0, R0, 0);                          // Nothing.
+  __ AddConstant(R1, R1, 1);                          // 16-bit ADDS, encoding T2,
+  __ AddConstant(R0, R0, 7);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255);                        // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255);                       // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // High registers.
+  __ AddConstant(R8, R8, 0);                          // Nothing.
+  __ AddConstant(R8, R1, 1);                          // 32-bit ADD, encoding T3,
+  __ AddConstant(R0, R8, 7);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R8, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R8, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R8, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R8, R8, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R8, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R8, R8, -1);                         // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, -7);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R8, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R8, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R8, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -0x10002);                   // MVN+SUB.
+  __ AddConstant(R0, R8, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcKeep.
+  __ AddConstant(R0, R1, 0, AL, kCcKeep);             // MOV.
+  __ AddConstant(R0, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn, kCcKeep.
+  __ AddConstant(R0, R0, 0, AL, kCcKeep);             // Nothing.
+  __ AddConstant(R1, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcSet.
+  __ AddConstant(R0, R1, 0, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 1, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 7, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 8, AL, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R1, -1, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8, AL, kCcSet);             // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcSet);           // MVN+ADDS.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcSet);        // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  // Low registers, Rd == Rn, kCcSet.
+  __ AddConstant(R0, R0, 0, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 1, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 7, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255, AL, kCcSet);            // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R0, -1, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255, AL, kCcSet);           // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256, AL, kCcSet);           // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcSet);           // MNV+ADDS.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcSet);        // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  __ it(EQ);
+  __ AddConstant(R0, R1, 1, EQ, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(NE);
+  __ AddConstant(R0, R1, 1, NE, kCcKeep);             // 16-bit ADDS, encoding T1.
+  __ it(GE);
+  __ AddConstant(R0, R0, 1, GE, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(LE);
+  __ AddConstant(R0, R0, 1, LE, kCcKeep);             // 16-bit ADDS, encoding T2.
+
+  EmitAndCheck(&assembler, "AddConstant");
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b79c2e4..f07f8c7 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -132,8 +132,8 @@
 const char* DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
-  "   6:	f201 0055 	addw	r0, r1, #85	; 0x55\n",
-  "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
+  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
+  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
   "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
@@ -201,6 +201,24 @@
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
+const char* ShiftImmediateResults[] = {
+  "   0:  0123        lsls  r3, r4, #4\n",
+  "   2:  0963        lsrs  r3, r4, #5\n",
+  "   4:  11a3        asrs  r3, r4, #6\n",
+  "   6:  ea4f 13f4   mov.w  r3, r4, ror #7\n",
+  "   a:  ea4f 0334   mov.w  r3, r4, rrx\n",
+  "   e:  ea4f 1304   mov.w r3, r4, lsl #4\n",
+  "  12:  ea4f 1354   mov.w r3, r4, lsr #5\n",
+  "  16:  ea4f 13a4   mov.w r3, r4, asr #6\n",
+  "  1a:  ea4f 13f4   mov.w r3, r4, ror #7\n",
+  "  1e:  ea4f 0334   mov.w r3, r4, rrx\n",
+  "  22:  ea5f 1804   movs.w  r8, r4, lsl #4\n",
+  "  26:  ea5f 1854   movs.w  r8, r4, lsr #5\n",
+  "  2a:  ea5f 18a4   movs.w  r8, r4, asr #6\n",
+  "  2e:  ea5f 18f4   movs.w  r8, r4, ror #7\n",
+  "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
+  nullptr
+};
 const char* BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
@@ -421,36 +439,128 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
-  "   4:	f20d 0850 	addw	r8, sp, #80	; 0x50\n",
-  "   8:	f60d 7200 	addw	r2, sp, #3840	; 0xf00\n",
-  "   c:	f60d 7d00 	addw	sp, sp, #3840	; 0xf00\n",
-  "  10:	b094      	sub	sp, #80		; 0x50\n",
-  "  12:	f2ad 0050 	subw	r0, sp, #80	; 0x50\n",
-  "  16:	f2ad 0850 	subw	r8, sp, #80	; 0x50\n",
-  "  1a:	f6ad 7d00 	subw	sp, sp, #3840	; 0xf00\n",
+  "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
+  "   8:	f50d 6270 	add.w	r2, sp, #3840	; 0xf00\n",
+  "   c:	f50d 6d70 	add.w	sp, sp, #3840	; 0xf00\n",
+  "  10:	f60d 7dfc 	addw	sp, sp, #4092	; 0xffc\n",
+  "  14:	b094      	sub	sp, #80		; 0x50\n",
+  "  16:	f1ad 0050 	sub.w	r0, sp, #80	; 0x50\n",
+  "  1a:	f1ad 0850 	sub.w	r8, sp, #80	; 0x50\n",
+  "  1e:	f5ad 6d70 	sub.w	sp, sp, #3840	; 0xf00\n",
+  "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
+  nullptr
+};
+const char* LoadFromOffsetResults[] = {
+  "   0:	68e2      	ldr	r2, [r4, #12]\n",
+  "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "   a:	6812      	ldr	r2, [r2, #0]\n",
+  "   c:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  10:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  14:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  18:	f2c0 0210 	movt	r2, #16\n",
+  "  1c:	4422      	add	r2, r4\n",
+  "  1e:	6812      	ldr	r2, [r2, #0]\n",
+  "  20:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  24:	f2c0 0c10 	movt	ip, #16\n",
+  "  28:	4464      	add	r4, ip\n",
+  "  2a:	6824      	ldr	r4, [r4, #0]\n",
+  "  2c:	89a2      	ldrh	r2, [r4, #12]\n",
+  "  2e:	f8b4 2fff 	ldrh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  32:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "  36:	8812      	ldrh	r2, [r2, #0]\n",
+  "  38:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  3c:	f8b2 20a4 	ldrh.w	r2, [r2, #164]	; 0xa4\n",
+  "  40:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  44:	f2c0 0210 	movt	r2, #16\n",
+  "  48:	4422      	add	r2, r4\n",
+  "  4a:	8812      	ldrh	r2, [r2, #0]\n",
+  "  4c:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  50:	f2c0 0c10 	movt	ip, #16\n",
+  "  54:	4464      	add	r4, ip\n",
+  "  56:	8824      	ldrh	r4, [r4, #0]\n",
+  "  58:	e9d4 2303 	ldrd	r2, r3, [r4, #12]\n",
+  "  5c:	e9d4 23ff 	ldrd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  60:	f504 6280 	add.w	r2, r4, #1024	; 0x400\n",
+  "  64:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  68:	f504 2280 	add.w	r2, r4, #262144	; 0x40000\n",
+  "  6c:	e9d2 2329 	ldrd	r2, r3, [r2, #164];	0xa4\n",
+  "  70:	f240 4200 	movw	r2, #1024	; 0x400\n",
+  "  74:	f2c0 0204 	movt	r2, #4\n",
+  "  78:	4422      	add	r2, r4\n",
+  "  7a:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  7e:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  82:	f2c0 0c04 	movt	ip, #4\n",
+  "  86:	4464      	add	r4, ip\n",
+  "  88:	e9d4 4500 	ldrd	r4, r5, [r4]\n",
+  "  8c:	f8dc 000c 	ldr.w	r0, [ip, #12]\n",
+  "  90:	f5a4 1280 	sub.w	r2, r4, #1048576	; 0x100000\n",
+  "  94:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  98:	f994 200c 	ldrsb.w	r2, [r4, #12]\n",
+  "  9c:	7b22      	ldrb	r2, [r4, #12]\n",
+  "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
   nullptr
 };
 const char* StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
-  "   2:	f44f 5c00 	mov.w	ip, #8192	; 0x2000\n",
-  "   6:	44a4      	add	ip, r4\n",
-  "   8:	f8cc 2000 	str.w	r2, [ip]\n",
-  "   c:	f8cc 000c 	str.w	r0, [ip, #12]\n",
-  "   10:	f8ac 000c 	strh.w	r0, [ip, #12]\n",
-  "   14:	f88c 200c 	strb.w	r2, [ip, #12]\n",
+  "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "   a:	f8cc 2000 	str.w	r2, [ip]\n",
+  "   e:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  12:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  16:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  1a:	f2c0 0c10 	movt	ip, #16\n",
+  "  1e:	44a4      	add	ip, r4\n",
+  "  20:	f8cc 2000 	str.w	r2, [ip]\n",
+  "  24:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  28:	f2c0 0c10 	movt	ip, #16\n",
+  "  2c:	44a4      	add	ip, r4\n",
+  "  2e:	f8cc 4000 	str.w	r4, [ip]\n",
+  "  32:	81a2      	strh	r2, [r4, #12]\n",
+  "  34:	f8a4 2fff 	strh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  38:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "  3c:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  40:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  44:	f8ac 20a4 	strh.w	r2, [ip, #164]	; 0xa4\n",
+  "  48:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  4c:	f2c0 0c10 	movt	ip, #16\n",
+  "  50:	44a4      	add	ip, r4\n",
+  "  52:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  56:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  5a:	f2c0 0c10 	movt	ip, #16\n",
+  "  5e:	44a4      	add	ip, r4\n",
+  "  60:	f8ac 4000 	strh.w	r4, [ip]\n",
+  "  64:	e9c4 2303 	strd	r2, r3, [r4, #12]\n",
+  "  68:	e9c4 23ff 	strd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  6c:	f504 6c80 	add.w	ip, r4, #1024	; 0x400\n",
+  "  70:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  74:	f504 2c80 	add.w	ip, r4, #262144	; 0x40000\n",
+  "  78:	e9cc 2329 	strd	r2, r3, [ip, #164];	0xa4\n",
+  "  7c:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  80:	f2c0 0c04 	movt	ip, #4\n",
+  "  84:	44a4      	add	ip, r4\n",
+  "  86:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  8a:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  8e:	f2c0 0c04 	movt	ip, #4\n",
+  "  92:	44a4      	add	ip, r4\n",
+  "  94:	e9cc 4500 	strd	r4, r5, [ip]\n",
+  "  98:	f8cc 000c 	str.w	r0, [ip, #12]\n",
+  "  9c:	f5a4 1c80 	sub.w	ip, r4, #1048576	; 0x100000\n",
+  "  a0:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
 const char* IfThenResults[] = {
@@ -4942,6 +5052,324 @@
   nullptr
 };
 
+const char* AddConstantResults[] = {
+  "   0:	4608      	mov	r0, r1\n",
+  "   2:	1c48      	adds	r0, r1, #1\n",
+  "   4:	1dc8      	adds	r0, r1, #7\n",
+  "   6:	f101 0008 	add.w	r0, r1, #8\n",
+  "   a:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  "   e:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  "  12:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  "  16:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  "  1a:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  "  1e:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  22:	1a08      	subs	r0, r1, r0\n",
+  "  24:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  28:	1808      	adds	r0, r1, r0\n",
+  "  2a:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  2e:	1808      	adds	r0, r1, r0\n",
+  "  30:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  "  34:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  "  38:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  3c:	1a08      	subs	r0, r1, r0\n",
+  "  3e:	f240 0003 	movw	r0, #3\n",
+  "  42:	f2c0 0001 	movt	r0, #1\n",
+  "  46:	1808      	adds	r0, r1, r0\n",
+  "  48:	1e48      	subs	r0, r1, #1\n",
+  "  4a:	1fc8      	subs	r0, r1, #7\n",
+  "  4c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  "  50:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  "  54:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  "  58:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  "  5c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  "  60:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  "  64:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  68:	1808      	adds	r0, r1, r0\n",
+  "  6a:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  6e:	1a08      	subs	r0, r1, r0\n",
+  "  70:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  74:	1a08      	subs	r0, r1, r0\n",
+  "  76:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  "  7a:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  "  7e:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  82:	1808      	adds	r0, r1, r0\n",
+  "  84:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  "  88:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  "  8c:	1808      	adds	r0, r1, r0\n",
+  "  8e:	3101      	adds	r1, #1\n",
+  "  90:	3007      	adds	r0, #7\n",
+  "  92:	3108      	adds	r1, #8\n",
+  "  94:	30ff      	adds	r0, #255	; 0xff\n",
+  "  96:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  "  9a:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  "  9e:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  "  a2:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  "  a6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  aa:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  ae:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  b2:	4460      	add	r0, ip\n",
+  "  b4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  b8:	4461      	add	r1, ip\n",
+  "  ba:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  "  be:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  "  c2:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  c6:	eba0 000c 	sub.w	r0, r0, ip\n",
+  "  ca:	f240 0c03 	movw	ip, #3\n",
+  "  ce:	f2c0 0c01 	movt	ip, #1\n",
+  "  d2:	4461      	add	r1, ip\n",
+  "  d4:	3801      	subs	r0, #1\n",
+  "  d6:	3907      	subs	r1, #7\n",
+  "  d8:	3808      	subs	r0, #8\n",
+  "  da:	39ff      	subs	r1, #255	; 0xff\n",
+  "  dc:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  "  e0:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  "  e4:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  "  e8:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  "  ec:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  f0:	4460      	add	r0, ip\n",
+  "  f2:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  f6:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  fa:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  fe:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 102:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 106:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 10a:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 10e:	4461      	add	r1, ip\n",
+  " 110:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 114:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 118:	4460      	add	r0, ip\n",
+  " 11a:	f101 0801 	add.w	r8, r1, #1\n",
+  " 11e:	f108 0007 	add.w	r0, r8, #7\n",
+  " 122:	f108 0808 	add.w	r8, r8, #8\n",
+  " 126:	f101 08ff 	add.w	r8, r1, #255	; 0xff\n",
+  " 12a:	f508 7080 	add.w	r0, r8, #256	; 0x100\n",
+  " 12e:	f208 1801 	addw	r8, r8, #257	; 0x101\n",
+  " 132:	f601 78ff 	addw	r8, r1, #4095	; 0xfff\n",
+  " 136:	f508 5080 	add.w	r0, r8, #4096	; 0x1000\n",
+  " 13a:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 13e:	eba8 080c 	sub.w	r8, r8, ip\n",
+  " 142:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 146:	1808      	adds	r0, r1, r0\n",
+  " 148:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 14c:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 150:	f508 3880 	add.w	r8, r8, #65536	; 0x10000\n",
+  " 154:	f101 1801 	add.w	r8, r1, #65537	; 0x10001\n",
+  " 158:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 15c:	eba8 0000 	sub.w	r0, r8, r0\n",
+  " 160:	f240 0003 	movw	r0, #3\n",
+  " 164:	f2c0 0001 	movt	r0, #1\n",
+  " 168:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 16c:	f108 38ff 	add.w	r8, r8, #4294967295	; 0xffffffff\n",
+  " 170:	f1a1 0807 	sub.w	r8, r1, #7\n",
+  " 174:	f1a8 0008 	sub.w	r0, r8, #8\n",
+  " 178:	f1a8 08ff 	sub.w	r8, r8, #255	; 0xff\n",
+  " 17c:	f5a1 7880 	sub.w	r8, r1, #256	; 0x100\n",
+  " 180:	f2a8 1001 	subw	r0, r8, #257	; 0x101\n",
+  " 184:	f6a8 78ff 	subw	r8, r8, #4095	; 0xfff\n",
+  " 188:	f5a1 5880 	sub.w	r8, r1, #4096	; 0x1000\n",
+  " 18c:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 190:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 194:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 198:	1a08      	subs	r0, r1, r0\n",
+  " 19a:	f64f 78ff 	movw	r8, #65535	; 0xffff\n",
+  " 19e:	eba1 0808 	sub.w	r8, r1, r8\n",
+  " 1a2:	f5a8 3080 	sub.w	r0, r8, #65536	; 0x10000\n",
+  " 1a6:	f1a8 1801 	sub.w	r8, r8, #65537	; 0x10001\n",
+  " 1aa:	f06f 1801 	mvn.w	r8, #65537	; 0x10001\n",
+  " 1ae:	eb01 0808 	add.w	r8, r1, r8\n",
+  " 1b2:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 1b6:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 1ba:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 1be:	4608      	mov	r0, r1\n",
+  " 1c0:	f101 0001 	add.w	r0, r1, #1\n",
+  " 1c4:	f101 0007 	add.w	r0, r1, #7\n",
+  " 1c8:	f101 0008 	add.w	r0, r1, #8\n",
+  " 1cc:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  " 1d0:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  " 1d4:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  " 1d8:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  " 1dc:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  " 1e0:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 1e4:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 1e8:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 1ec:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f0:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 1f4:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f8:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  " 1fc:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  " 200:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 204:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 208:	f240 0003 	movw	r0, #3\n",
+  " 20c:	f2c0 0001 	movt	r0, #1\n",
+  " 210:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 214:	f101 30ff 	add.w	r0, r1, #4294967295	; 0xffffffff\n",
+  " 218:	f1a1 0007 	sub.w	r0, r1, #7\n",
+  " 21c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  " 220:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  " 224:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  " 228:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  " 22c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  " 230:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  " 234:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 238:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 23c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 240:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 244:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 248:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 24c:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  " 250:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  " 254:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 258:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 25c:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 260:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 264:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 268:	f101 0101 	add.w	r1, r1, #1\n",
+  " 26c:	f100 0007 	add.w	r0, r0, #7\n",
+  " 270:	f101 0108 	add.w	r1, r1, #8\n",
+  " 274:	f100 00ff 	add.w	r0, r0, #255	; 0xff\n",
+  " 278:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  " 27c:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  " 280:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  " 284:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  " 288:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 28c:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 290:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 294:	4460      	add	r0, ip\n",
+  " 296:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 29a:	4461      	add	r1, ip\n",
+  " 29c:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  " 2a0:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  " 2a4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2a8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ac:	f240 0c03 	movw	ip, #3\n",
+  " 2b0:	f2c0 0c01 	movt	ip, #1\n",
+  " 2b4:	4461      	add	r1, ip\n",
+  " 2b6:	f100 30ff 	add.w	r0, r0, #4294967295	; 0xffffffff\n",
+  " 2ba:	f1a1 0107 	sub.w	r1, r1, #7\n",
+  " 2be:	f1a0 0008 	sub.w	r0, r0, #8\n",
+  " 2c2:	f1a1 01ff 	sub.w	r1, r1, #255	; 0xff\n",
+  " 2c6:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  " 2ca:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  " 2ce:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  " 2d2:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  " 2d6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 2da:	4460      	add	r0, ip\n",
+  " 2dc:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 2e0:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 2e4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 2e8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ec:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 2f0:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 2f4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2f8:	4461      	add	r1, ip\n",
+  " 2fa:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 2fe:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 302:	4460      	add	r0, ip\n",
+  " 304:	1c08      	adds	r0, r1, #0\n",
+  " 306:	1c48      	adds	r0, r1, #1\n",
+  " 308:	1dc8      	adds	r0, r1, #7\n",
+  " 30a:	f111 0008 	adds.w	r0, r1, #8\n",
+  " 30e:	f111 00ff 	adds.w	r0, r1, #255	; 0xff\n",
+  " 312:	f511 7080 	adds.w	r0, r1, #256	; 0x100\n",
+  " 316:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 31a:	1a08      	subs	r0, r1, r0\n",
+  " 31c:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 320:	1808      	adds	r0, r1, r0\n",
+  " 322:	f511 5080 	adds.w	r0, r1, #4096	; 0x1000\n",
+  " 326:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 32a:	1a08      	subs	r0, r1, r0\n",
+  " 32c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 330:	1808      	adds	r0, r1, r0\n",
+  " 332:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 336:	1808      	adds	r0, r1, r0\n",
+  " 338:	f511 3080 	adds.w	r0, r1, #65536	; 0x10000\n",
+  " 33c:	f111 1001 	adds.w	r0, r1, #65537	; 0x10001\n",
+  " 340:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 344:	1a08      	subs	r0, r1, r0\n",
+  " 346:	f240 0003 	movw	r0, #3\n",
+  " 34a:	f2c0 0001 	movt	r0, #1\n",
+  " 34e:	1808      	adds	r0, r1, r0\n",
+  " 350:	1e48      	subs	r0, r1, #1\n",
+  " 352:	1fc8      	subs	r0, r1, #7\n",
+  " 354:	f1b1 0008 	subs.w	r0, r1, #8\n",
+  " 358:	f1b1 00ff 	subs.w	r0, r1, #255	; 0xff\n",
+  " 35c:	f5b1 7080 	subs.w	r0, r1, #256	; 0x100\n",
+  " 360:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 364:	1808      	adds	r0, r1, r0\n",
+  " 366:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 36a:	1a08      	subs	r0, r1, r0\n",
+  " 36c:	f5b1 5080 	subs.w	r0, r1, #4096	; 0x1000\n",
+  " 370:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 374:	1808      	adds	r0, r1, r0\n",
+  " 376:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 37a:	1a08      	subs	r0, r1, r0\n",
+  " 37c:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 380:	1a08      	subs	r0, r1, r0\n",
+  " 382:	f5b1 3080 	subs.w	r0, r1, #65536	; 0x10000\n",
+  " 386:	f1b1 1001 	subs.w	r0, r1, #65537	; 0x10001\n",
+  " 38a:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 38e:	1808      	adds	r0, r1, r0\n",
+  " 390:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 394:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 398:	1808      	adds	r0, r1, r0\n",
+  " 39a:	3000      	adds	r0, #0\n",
+  " 39c:	3101      	adds	r1, #1\n",
+  " 39e:	3007      	adds	r0, #7\n",
+  " 3a0:	3108      	adds	r1, #8\n",
+  " 3a2:	30ff      	adds	r0, #255	; 0xff\n",
+  " 3a4:	f511 7180 	adds.w	r1, r1, #256	; 0x100\n",
+  " 3a8:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 3ac:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3b0:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 3b4:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3b8:	f510 5080 	adds.w	r0, r0, #4096	; 0x1000\n",
+  " 3bc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 3c0:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 3c4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 3c8:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 3cc:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 3d0:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3d4:	f510 3080 	adds.w	r0, r0, #65536	; 0x10000\n",
+  " 3d8:	f111 1101 	adds.w	r1, r1, #65537	; 0x10001\n",
+  " 3dc:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 3e0:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3e4:	f240 0c03 	movw	ip, #3\n",
+  " 3e8:	f2c0 0c01 	movt	ip, #1\n",
+  " 3ec:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3f0:	3801      	subs	r0, #1\n",
+  " 3f2:	3907      	subs	r1, #7\n",
+  " 3f4:	3808      	subs	r0, #8\n",
+  " 3f6:	39ff      	subs	r1, #255	; 0xff\n",
+  " 3f8:	f5b0 7080 	subs.w	r0, r0, #256	; 0x100\n",
+  " 3fc:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 400:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 404:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 408:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 40c:	f5b1 5180 	subs.w	r1, r1, #4096	; 0x1000\n",
+  " 410:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 414:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 418:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 41c:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 420:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 424:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 428:	f5b1 3180 	subs.w	r1, r1, #65536	; 0x10000\n",
+  " 42c:	f1b0 1001 	subs.w	r0, r0, #65537	; 0x10001\n",
+  " 430:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 434:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 438:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 43c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 440:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 444:	bf08      	it	eq\n",
+  " 446:	f111 0001 	addseq.w	r0, r1, #1\n",
+  " 44a:	bf18      	it	ne\n",
+  " 44c:	1c48      	addne	r0, r1, #1\n",
+  " 44e:	bfa8      	it	ge\n",
+  " 450:	f110 0001 	addsge.w	r0, r0, #1\n",
+  " 454:	bfd8      	it	le\n",
+  " 456:	3001      	addle	r0, #1\n",
+  nullptr
+};
+
 std::map<std::string, const char* const*> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -4952,6 +5380,7 @@
     test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults;
     test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults;
     test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults;
+    test_results["ShiftImmediate"] = ShiftImmediateResults;
     test_results["BasicLoad"] = BasicLoadResults;
     test_results["BasicStore"] = BasicStoreResults;
     test_results["ComplexLoad"] = ComplexLoadResults;
@@ -4966,6 +5395,7 @@
     test_results["StoreMultiple"] = StoreMultipleResults;
     test_results["MovWMovT"] = MovWMovTResults;
     test_results["SpecialAddSub"] = SpecialAddSubResults;
+    test_results["LoadFromOffset"] = LoadFromOffsetResults;
     test_results["StoreToOffset"] = StoreToOffsetResults;
     test_results["IfThen"] = IfThenResults;
     test_results["CbzCbnz"] = CbzCbnzResults;
@@ -4990,4 +5420,5 @@
     test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
     test_results["CompareAndBranch"] = CompareAndBranchResults;
+    test_results["AddConstant"] = AddConstantResults;
 }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index aee6412..afca8ad 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -249,6 +249,11 @@
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
   EmitR(0, rs, rt, rd, 3, 0x19);
@@ -302,6 +307,46 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
+void MipsAssembler::Movz(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0A);
+}
+
+void MipsAssembler::Movn(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0B);
+}
+
+void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x35);
+}
+
+void MipsAssembler::Selnez(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x37);
+}
+
+void MipsAssembler::ClzR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10);
+}
+
+void MipsAssembler::ClzR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x20);
+}
+
+void MipsAssembler::CloR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11);
+}
+
+void MipsAssembler::CloR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x21);
+}
+
 void MipsAssembler::Seb(Register rd, Register rt) {
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
 }
@@ -310,15 +355,32 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
+void MipsAssembler::Wsbh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+}
+
+void MipsAssembler::Bitswap(Register rd, Register rt) {
+  CHECK(IsR6());
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
 void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
 }
 
+void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
+  EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+}
+
 void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
 }
 
@@ -330,6 +392,10 @@
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
+void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) {
+  EmitR(0, rs, rt, rd, 1, 0x06);
+}
+
 void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
@@ -437,6 +503,18 @@
   EmitI(0x7, rt, static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bc1f(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16);
+}
+
+void MipsAssembler::Bc1t(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16);
+}
+
 void MipsAssembler::J(uint32_t addr26) {
   EmitI26(0x2, addr26);
 }
@@ -571,7 +649,17 @@
   EmitI21(0x3E, rs, imm21);
 }
 
-void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
+void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
+void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
   switch (cond) {
     case kCondLTZ:
       CHECK_EQ(rt, ZERO);
@@ -603,6 +691,14 @@
       CHECK_EQ(rt, ZERO);
       Bnez(rs, imm16);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1f(static_cast<int>(rs), imm16);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1t(static_cast<int>(rs), imm16);
+      break;
     case kCondLT:
     case kCondGE:
     case kCondLE:
@@ -617,7 +713,7 @@
   }
 }
 
-void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
+void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
   switch (cond) {
     case kCondLT:
       Bltc(rs, rt, imm16_21);
@@ -667,6 +763,14 @@
     case kCondGEU:
       Bgeuc(rs, rt, imm16_21);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FRegister>(rs), imm16_21);
+      break;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
       UNREACHABLE();
@@ -721,6 +825,202 @@
   EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
 }
 
+void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::Movf(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01);
+}
+
+void MipsAssembler::Movt(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -992,6 +1292,10 @@
       CHECK_NE(lhs_reg, ZERO);
       CHECK_EQ(rhs_reg, ZERO);
       break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
     case kUncond:
       UNREACHABLE();
   }
@@ -1046,6 +1350,10 @@
       return kCondGEU;
     case kCondGEU:
       return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
   }
@@ -1448,7 +1756,7 @@
       break;
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcond(condition, lhs, rhs, offset);
+      EmitBcondR2(condition, lhs, rhs, offset);
       Nop();  // TODO: improve by filling the delay slot.
       break;
     case Branch::kCall:
@@ -1495,7 +1803,7 @@
       // Note: the opposite condition branch encodes 8 as the distance, which is equal to the
       // number of instructions skipped:
       // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
-      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8);
+      EmitBcondR2(Branch::OppositeCondition(condition), lhs, rhs, 8);
       Push(RA);
       Nal();
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1523,8 +1831,8 @@
       break;
     case Branch::kR6CondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcondc(condition, lhs, rhs, offset);
-      Nop();  // TODO: improve by filling the forbidden slot.
+      EmitBcondR6(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kR6Call:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1540,7 +1848,7 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCondBranch:
-      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
       offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Auipc(AT, High16Bits(offset));
@@ -1642,6 +1950,24 @@
   }
 }
 
+void MipsAssembler::Bc1f(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondF, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1t(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondT, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondF, static_cast<Register>(ft), ZERO);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
+}
+
 void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
                                    int32_t offset) {
   // IsInt<16> must be passed a signed value.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 4038c1f..f569aa8 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -72,8 +72,8 @@
       : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
   MipsExceptionSlowPath(MipsExceptionSlowPath&& src)
-      : scratch_(std::move(src.scratch_)),
-        stack_adjust_(std::move(src.stack_adjust_)),
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
         exception_entry_(std::move(src.exception_entry_)) {}
 
  private:
@@ -119,6 +119,7 @@
   void DivuR2(Register rd, Register rs, Register rt);  // R2
   void ModuR2(Register rd, Register rs, Register rt);  // R2
   void MulR6(Register rd, Register rs, Register rt);  // R6
+  void MuhR6(Register rd, Register rs, Register rt);  // R6
   void MuhuR6(Register rd, Register rs, Register rt);  // R6
   void DivR6(Register rd, Register rs, Register rt);  // R6
   void ModR6(Register rd, Register rs, Register rt);  // R6
@@ -133,14 +134,27 @@
   void Xori(Register rt, Register rs, uint16_t imm16);
   void Nor(Register rd, Register rs, Register rt);
 
+  void Movz(Register rd, Register rs, Register rt);  // R2
+  void Movn(Register rd, Register rs, Register rt);  // R2
+  void Seleqz(Register rd, Register rs, Register rt);  // R6
+  void Selnez(Register rd, Register rs, Register rt);  // R6
+  void ClzR6(Register rd, Register rs);
+  void ClzR2(Register rd, Register rs);
+  void CloR6(Register rd, Register rs);
+  void CloR2(Register rd, Register rs);
+
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
+  void Wsbh(Register rd, Register rt);  // R2+
+  void Bitswap(Register rd, Register rt);  // R6
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
+  void Rotr(Register rd, Register rt, int shamt);  // R2+
   void Sra(Register rd, Register rt, int shamt);
   void Sllv(Register rd, Register rt, Register rs);
   void Srlv(Register rd, Register rt, Register rs);
+  void Rotrv(Register rd, Register rt, Register rs);  // R2+
   void Srav(Register rd, Register rt, Register rs);
 
   void Lb(Register rt, Register rs, uint16_t imm16);
@@ -171,6 +185,8 @@
   void Bgez(Register rt, uint16_t imm16);
   void Blez(Register rt, uint16_t imm16);
   void Bgtz(Register rt, uint16_t imm16);
+  void Bc1f(int cc, uint16_t imm16);  // R2
+  void Bc1t(int cc, uint16_t imm16);  // R2
   void J(uint32_t addr26);
   void Jal(uint32_t addr26);
   void Jalr(Register rd, Register rs);
@@ -194,6 +210,8 @@
   void Bnec(Register rs, Register rt, uint16_t imm16);  // R6
   void Beqzc(Register rs, uint32_t imm21);  // R6
   void Bnezc(Register rs, uint32_t imm21);  // R6
+  void Bc1eqz(FRegister ft, uint16_t imm16);  // R6
+  void Bc1nez(FRegister ft, uint16_t imm16);  // R6
 
   void AddS(FRegister fd, FRegister fs, FRegister ft);
   void SubS(FRegister fd, FRegister fs, FRegister ft);
@@ -208,6 +226,43 @@
   void NegS(FRegister fd, FRegister fs);
   void NegD(FRegister fd, FRegister fs);
 
+  void CunS(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqS(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtS(int cc, FRegister fs, FRegister ft);  // R2
+  void CultS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeS(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleS(int cc, FRegister fs, FRegister ft);  // R2
+  void CunD(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqD(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtD(int cc, FRegister fs, FRegister ft);  // R2
+  void CultD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeD(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleD(int cc, FRegister fs, FRegister ft);  // R2
+  void CmpUnS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUnD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void Movf(Register rd, Register rs, int cc);  // R2
+  void Movt(Register rd, Register rs, int cc);  // R2
+
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
@@ -253,6 +308,10 @@
   void Bge(Register rs, Register rt, MipsLabel* label);
   void Bltu(Register rs, Register rt, MipsLabel* label);
   void Bgeu(Register rs, Register rt, MipsLabel* label);
+  void Bc1f(int cc, MipsLabel* label);  // R2
+  void Bc1t(int cc, MipsLabel* label);  // R2
+  void Bc1eqz(FRegister ft, MipsLabel* label);  // R6
+  void Bc1nez(FRegister ft, MipsLabel* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
@@ -282,7 +341,8 @@
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
@@ -300,58 +360,85 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister mscratch)
-      OVERRIDE;
+  void StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest,
+                                uint32_t imm,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                  FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) OVERRIDE;
 
-  void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister msrc,
+                     FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE;
+  void LoadFromThread32(ManagedRegister mdest,
+                        ThreadOffset<kMipsWordSize> src,
+                        size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
-  void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+  void LoadRef(ManagedRegister mdest,
+               ManagedRegister base,
+               MemberOffset offs,
                bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE;
+  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<kMipsWordSize> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
+  void CopyRawPtrFromThread32(FrameOffset fr_offs,
+                              ThreadOffset<kMipsWordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                            FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
@@ -369,13 +456,17 @@
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister mscratch, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister mscratch,
+                              bool null_allowed) OVERRIDE;
 
   // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
@@ -388,7 +479,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread32(ThreadOffset<kMipsWordSize> offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
@@ -423,6 +514,8 @@
     kCondNEZ,
     kCondLTU,
     kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
     kUncond,
   };
   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
@@ -529,7 +622,22 @@
     //
     // Composite branches (made of several instructions) with longer reach have 32-bit
     // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
-    // The composite branches cover the range of PC + +/-2GB.
+    // The composite branches cover the range of PC + +/-2GB on MIPS32 CPUs. However,
+    // the range is not end-to-end on MIPS64 (unless addresses are forced to zero- or
+    // sign-extend from 32 to 64 bits by the appropriate CPU configuration).
+    // Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
     //
     // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
     // case with the addiu instruction and a 16 bit offset.
@@ -566,17 +674,17 @@
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
-    uint32_t old_location_;          // Offset into assembler buffer in bytes.
-    uint32_t location_;              // Offset into assembler buffer in bytes.
-    uint32_t target_;                // Offset into assembler buffer in bytes.
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
 
-    uint32_t lhs_reg_ : 5;           // Left-hand side register in conditional branches or
-                                     // indirect call register.
-    uint32_t rhs_reg_ : 5;           // Right-hand side register in conditional branches.
-    BranchCondition condition_ : 5;  // Condition for conditional branches.
+    uint32_t lhs_reg_;           // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    uint32_t rhs_reg_;           // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
 
-    Type type_ : 5;                  // Current type of the branch.
-    Type old_type_ : 5;              // Initial type of the branch.
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
   };
   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -587,8 +695,8 @@
   void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
-  void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
-  void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);  // R6
+  void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
+  void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 063d8bd..6f8b3e8 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -21,6 +21,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPSCpuRegisterCompare {
@@ -184,6 +186,63 @@
     return result;
   }
 
+  void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                             mips::MipsLabel*),
+                              std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*),
+                               std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips::Register*> registers_;
   std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
@@ -196,8 +255,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-#define __ GetAssembler()->
-
 TEST_F(AssemblerMIPSTest, Addu) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu");
 }
@@ -418,6 +475,84 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, CunS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunS");
+}
+
+TEST_F(AssemblerMIPSTest, CeqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqS, 3, "c.eq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqS");
+}
+
+TEST_F(AssemblerMIPSTest, CueqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqS, 3, "c.ueq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqS");
+}
+
+TEST_F(AssemblerMIPSTest, ColtS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtS, 3, "c.olt.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtS");
+}
+
+TEST_F(AssemblerMIPSTest, CultS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultS, 3, "c.ult.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultS");
+}
+
+TEST_F(AssemblerMIPSTest, ColeS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeS, 3, "c.ole.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeS");
+}
+
+TEST_F(AssemblerMIPSTest, CuleS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleS, 3, "c.ule.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleS");
+}
+
+TEST_F(AssemblerMIPSTest, CunD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunD, 3, "c.un.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunD");
+}
+
+TEST_F(AssemblerMIPSTest, CeqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqD, 3, "c.eq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqD");
+}
+
+TEST_F(AssemblerMIPSTest, CueqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqD, 3, "c.ueq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqD");
+}
+
+TEST_F(AssemblerMIPSTest, ColtD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtD, 3, "c.olt.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtD");
+}
+
+TEST_F(AssemblerMIPSTest, CultD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultD, 3, "c.ult.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultD");
+}
+
+TEST_F(AssemblerMIPSTest, ColeD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeD, 3, "c.ole.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeD");
+}
+
+TEST_F(AssemblerMIPSTest, CuleD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleD, 3, "c.ule.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleD");
+}
+
+TEST_F(AssemblerMIPSTest, Movf) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movf, 3, "movf ${reg1}, ${reg2}, $fcc{imm}"), "Movf");
+}
+
+TEST_F(AssemblerMIPSTest, Movt) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
@@ -1000,55 +1135,11 @@
 }
 
 TEST_F(AssemblerMIPSTest, Beq) {
-  mips::MipsLabel label;
-  __ Beq(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Beq(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "beq $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "beq $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Beq");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
 }
 
 TEST_F(AssemblerMIPSTest, Bne) {
-  mips::MipsLabel label;
-  __ Bne(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bne(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bne $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bne $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bne");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
 }
 
 TEST_F(AssemblerMIPSTest, Beqz) {
@@ -1104,107 +1195,19 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bltz) {
-  mips::MipsLabel label;
-  __ Bltz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bltz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bltz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bltz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bltz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
 }
 
 TEST_F(AssemblerMIPSTest, Bgez) {
-  mips::MipsLabel label;
-  __ Bgez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
 }
 
 TEST_F(AssemblerMIPSTest, Blez) {
-  mips::MipsLabel label;
-  __ Blez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Blez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "blez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "blez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Blez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
 }
 
 TEST_F(AssemblerMIPSTest, Bgtz) {
-  mips::MipsLabel label;
-  __ Bgtz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgtz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgtz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgtz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgtz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
 }
 
 TEST_F(AssemblerMIPSTest, Blt) {
@@ -1319,6 +1322,58 @@
   DriverStr(expected, "Bgeu");
 }
 
+TEST_F(AssemblerMIPSTest, Bc1f) {
+  mips::MipsLabel label;
+  __ Bc1f(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1f(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1f $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1f $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1f");
+}
+
+TEST_F(AssemblerMIPSTest, Bc1t) {
+  mips::MipsLabel label;
+  __ Bc1t(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1t(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1t $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1t $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1t");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index ba2525e..cfd8421 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -19,15 +19,73 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "memory_region.h"
 #include "thread.h"
 
 namespace art {
 namespace mips64 {
 
+void Mips64Assembler::FinalizeCode() {
+  for (auto& exception_block : exception_blocks_) {
+    EmitExceptionPoll(&exception_block);
+  }
+  PromoteBranches();
+}
+
+void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+  EmitBranches();
+  Assembler::FinalizeInstructions(region);
+  PatchCFI();
+}
+
+void Mips64Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Mips64Assembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
 void Mips64Assembler::Emit(uint32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<uint32_t>(value);
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
 }
 
 void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -82,15 +140,16 @@
 
 void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
   CHECK_NE(rs, kNoGpuRegister);
+  CHECK(IsUint<21>(imm21)) << imm21;
   uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
                       static_cast<uint32_t>(rs) << kRsShift |
-                      (imm21 & 0x1FFFFF);
+                      imm21;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
-  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
-                      (addr26 & 0x3FFFFFF);
+void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) {
+  CHECK(IsUint<26>(imm26)) << imm26;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
   Emit(encoding);
 }
 
@@ -428,26 +487,6 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x4, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x5, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::J(uint32_t addr26) {
-  EmitJ(0x2, addr26);
-  Nop();
-}
-
-void Mips64Assembler::Jal(uint32_t addr26) {
-  EmitJ(0x3, addr26);
-  Nop();
-}
-
 void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x35);
 }
@@ -474,7 +513,6 @@
 
 void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
   EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
-  Nop();
 }
 
 void Mips64Assembler::Jalr(GpuRegister rs) {
@@ -489,6 +527,15 @@
   EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
 }
 
+void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) {
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, imm19);
+}
+
+void Mips64Assembler::Bc(uint32_t imm26) {
+  EmitI26(0x32, imm26);
+}
+
 void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
   EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -549,14 +596,14 @@
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
@@ -569,6 +616,81 @@
   EmitI21(0x3E, rs, imm21);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
+void Mips64Assembler::EmitBcondc(BranchCondition cond,
+                                 GpuRegister rs,
+                                 GpuRegister rt,
+                                 uint32_t imm16_21) {
+  switch (cond) {
+    case kCondLT:
+      Bltc(rs, rt, imm16_21);
+      break;
+    case kCondGE:
+      Bgec(rs, rt, imm16_21);
+      break;
+    case kCondLE:
+      Bgec(rt, rs, imm16_21);
+      break;
+    case kCondGT:
+      Bltc(rt, rs, imm16_21);
+      break;
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltzc(rs, imm16_21);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgezc(rs, imm16_21);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blezc(rs, imm16_21);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtzc(rs, imm16_21);
+      break;
+    case kCondEQ:
+      Beqc(rs, rt, imm16_21);
+      break;
+    case kCondNE:
+      Bnec(rs, rt, imm16_21);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqzc(rs, imm16_21);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnezc(rs, imm16_21);
+      break;
+    case kCondLTU:
+      Bltuc(rs, rt, imm16_21);
+      break;
+    case kCondGEU:
+      Bgeuc(rs, rt, imm16_21);
+      break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
   EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
 }
@@ -721,6 +843,86 @@
   EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
 }
 
+void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
 void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
 }
@@ -925,15 +1127,6 @@
   }
 }
 
-void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
-  if (IsInt<16>(value)) {
-    Addiu(rt, rs, value);
-  } else {
-    LoadConst32(rtmp, value);
-    Addu(rt, rs, rtmp);
-  }
-}
-
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
@@ -943,177 +1136,637 @@
   }
 }
 
-//
-// MIPS64R6 branches
-//
-//
-// Unconditional (pc + 32-bit signed offset):
-//
-//   auipc    at, ofs_high
-//   jic      at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Conditional (pc + 32-bit signed offset):
-//
-//   b<cond>c   reg, +2      // skip next 2 instructions
-//   auipc      at, ofs_high
-//   jic        at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Unconditional (pc + 32-bit signed offset) and link:
-//
-//   auipc    reg, ofs_high
-//   daddiu   reg, ofs_low
-//   jialc    reg, 0
-//   // no delay/forbidden slot
-//
-//
-// TODO: use shorter instruction sequences whenever possible.
-//
+void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size,
+                                              Mips64Assembler::Branch::Type short_type,
+                                              Mips64Assembler::Branch::Type long_type) {
+  type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
 
-void Mips64Assembler::Bind(Label* label) {
+void Mips64Assembler::Branch::InitializeType(bool is_call) {
+  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  if (is_call) {
+    InitShortOrLong(offset_size, kCall, kLongCall);
+  } else if (condition_ == kUncond) {
+    InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+  } else {
+    if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+      // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+      type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
+    } else {
+      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+    }
+  }
+  old_type_ = type_;
+}
+
+bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) {
+  switch (condition) {
+    case kCondLT:
+    case kCondGT:
+    case kCondNE:
+    case kCondLTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool Mips64Assembler::Branch::IsUncond(BranchCondition condition,
+                                       GpuRegister lhs,
+                                       GpuRegister rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondGE:
+    case kCondLE:
+    case kCondEQ:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(ZERO),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location,
+                                uint32_t target,
+                                Mips64Assembler::BranchCondition condition,
+                                GpuRegister lhs_reg,
+                                GpuRegister rhs_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      condition_(condition) {
+  CHECK_NE(condition, kUncond);
+  switch (condition) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_NE(rhs_reg, ZERO);
+      break;
+    case kCondLTZ:
+    case kCondGEZ:
+    case kCondLEZ:
+    case kCondGTZ:
+    case kCondEQZ:
+    case kCondNEZ:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kUncond:
+      UNREACHABLE();
+  }
+  CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  if (IsUncond(condition, lhs_reg, rhs_reg)) {
+    // Branch condition is always true, make the branch unconditional.
+    condition_ = kUncond;
+  }
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(indirect_reg),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  CHECK_NE(indirect_reg, ZERO);
+  CHECK_NE(indirect_reg, AT);
+  InitializeType(true);
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
+    Mips64Assembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTZ:
+      return kCondGEZ;
+    case kCondGEZ:
+      return kCondLTZ;
+    case kCondLEZ:
+      return kCondGTZ;
+    case kCondGTZ:
+      return kCondLEZ;
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondEQZ:
+      return kCondNEZ;
+    case kCondNEZ:
+      return kCondEQZ;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+  }
+  UNREACHABLE();
+}
+
+Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const {
+  return type_;
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const {
+  return condition_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetLeftRegister() const {
+  return lhs_reg_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetRightRegister() const {
+  return rhs_reg_;
+}
+
+uint32_t Mips64Assembler::Branch::GetTarget() const {
+  return target_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLocation() const {
+  return location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLocation() const {
+  return old_location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLength() const {
+  return branch_info_[type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLength() const {
+  return branch_info_[old_type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetSize() const {
+  return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOldSize() const {
+  return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetEndLocation() const {
+  return GetLocation() + GetSize();
+}
+
+uint32_t Mips64Assembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldSize();
+}
+
+bool Mips64Assembler::Branch::IsLong() const {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+    case kCondBranch:
+    case kCall:
+      return false;
+    // Long branches.
+    case kLongUncondBranch:
+    case kLongCondBranch:
+    case kLongCall:
+      return true;
+  }
+  UNREACHABLE();
+}
+
+bool Mips64Assembler::Branch::IsResolved() const {
+  return target_ != kUnresolved;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+  OffsetBits offset_size =
+      (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+          ? kOffset23
+          : branch_info_[type_].offset_size;
+  return offset_size;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+                                                                                 uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved)
+    return kOffset16;
+  int64_t distance = static_cast<int64_t>(target) - location;
+  // To simplify calculations in composite branches consisting of multiple instructions
+  // bump up the distance by a value larger than the max byte size of a composite branch.
+  distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+  if (IsInt<kOffset16>(distance))
+    return kOffset16;
+  else if (IsInt<kOffset18>(distance))
+    return kOffset18;
+  else if (IsInt<kOffset21>(distance))
+    return kOffset21;
+  else if (IsInt<kOffset23>(distance))
+    return kOffset23;
+  else if (IsInt<kOffset28>(distance))
+    return kOffset28;
+  return kOffset32;
+}
+
+void Mips64Assembler::Branch::Resolve(uint32_t target) {
+  target_ = target;
+}
+
+void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (!IsResolved()) {
+    return;  // Don't know the target yet.
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+void Mips64Assembler::Branch::PromoteToLong() {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+      type_ = kLongUncondBranch;
+      break;
+    case kCondBranch:
+      type_ = kLongCondBranch;
+      break;
+    case kCall:
+      type_ = kLongCall;
+      break;
+    default:
+      // Note: 'type_' is already long.
+      break;
+  }
+  CHECK(IsLong());
+}
+
+uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+  // If the branch is still unresolved or already long, nothing to do.
+  if (IsLong() || !IsResolved()) {
+    return 0;
+  }
+  // Promote the short branch to long if the offset size is too small
+  // to hold the distance between location_ and target_.
+  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+    PromoteToLong();
+    uint32_t old_size = GetOldSize();
+    uint32_t new_size = GetSize();
+    CHECK_GT(new_size, old_size);
+    return new_size - old_size;
+  }
+  // The following logic is for debugging/testing purposes.
+  // Promote some short branches to long when it's not really required.
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+    int64_t distance = static_cast<int64_t>(target_) - location_;
+    distance = (distance >= 0) ? distance : -distance;
+    if (distance >= max_short_distance) {
+      PromoteToLong();
+      uint32_t old_size = GetOldSize();
+      uint32_t new_size = GetSize();
+      CHECK_GT(new_size, old_size);
+      return new_size - old_size;
+    }
+  }
+  return 0;
+}
+
+uint32_t Mips64Assembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  // Prepare the offset for encoding into the instruction(s).
+  offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+  return offset;
+}
+
+Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void Mips64Assembler::Bind(Mips64Label* label) {
   CHECK(!label->IsBound());
-  int32_t bound_pc = buffer_.Size();
+  uint32_t bound_pc = buffer_.Size();
 
-  // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
-  // Embed the previously unknown pc-relative addresses in them.
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
   while (label->IsLinked()) {
-    int32_t position = label->Position();
-    // Extract the branch (instruction pair)
-    uint32_t auipc = buffer_.Load<uint32_t>(position);
-    uint32_t jic = buffer_.Load<uint32_t>(position + 4);  // actually, jic or daddiu
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
 
-    // Extract the location of the previous pair in the list (walking the list backwards;
-    // the previous pair location was stored in the immediate operands of the instructions)
-    int32_t prev = (auipc << 16) | (jic & 0xFFFF);
-
-    // Get the pc-relative address
-    uint32_t offset = bound_pc - position;
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic/daddiu
-
-    // Embed it in the two instructions
-    auipc = (auipc & 0xFFFF0000) | (offset >> 16);
-    jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
-
-    // Save the adjusted instructions
-    buffer_.Store<uint32_t>(position, auipc);
-    buffer_.Store<uint32_t>(position + 4, jic);
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
 
     // On to the previous branch in the list...
     label->position_ = prev;
   }
 
-  // Now make the label object contain its own location
-  // (it will be used by the branches referring to and following this label)
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  label->prev_branch_id_plus_one_ = branches_.size();
+  if (label->prev_branch_id_plus_one_) {
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    bound_pc -= branch->GetEndLocation();
+  }
   label->BindTo(bound_pc);
 }
 
-void Mips64Assembler::B(Label* label) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic
-    Auipc(AT, offset >> 16);
-    Jic(AT, offset);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(AT, prev >> 16);
-    Jic(AT, prev);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_plus_one_) {
+    // Get label location based on the branch preceding it.
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    target += branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+  // with increasing old_position, we can use the data from last AdjustedPosition() to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) {
+  uint32_t length = branches_.back().GetLength();
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  while (length--) {
+    Nop();
   }
 }
 
-void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in daddiu
-    Auipc(indirect_reg, offset >> 16);
-    Daddiu(indirect_reg, indirect_reg, offset);
-    Jialc(indirect_reg, 0);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(indirect_reg, prev >> 16);
-    Daddiu(indirect_reg, indirect_reg, prev);
-    Jialc(indirect_reg, 0);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+void Mips64Assembler::Buncond(Mips64Label* label) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Bcond(Mips64Label* label,
+                            BranchCondition condition,
+                            GpuRegister lhs,
+                            GpuRegister rhs) {
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, indirect_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0; ) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
   }
 }
 
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgec(rs, rt, 2);
-  B(label);
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
+  // Short branches.
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kUncondBranch
+  {  2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kCondBranch
+                                                        // Exception: kOffset23 for beqzc/bnezc
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kCall
+  // Long branches.
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongUncondBranch
+  {  3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCondBranch
+  {  3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCall
+};
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
+  CHECK(overwriting_);
+  overwrite_location_ = branch->GetLocation();
+  uint32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  GpuRegister lhs = branch->GetLeftRegister();
+  GpuRegister rhs = branch->GetRightRegister();
+  switch (branch->GetType()) {
+    // Short branches.
+    case Branch::kUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondc(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
+      break;
+    case Branch::kCall:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiupc(lhs, offset);
+      Jialc(lhs, 0);
+      break;
+
+    // Long branches.
+    case Branch::kLongUncondBranch:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCondBranch:
+      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCall:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in daddiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(lhs, High16Bits(offset));
+      Daddiu(lhs, lhs, Low16Bits(offset));
+      Jialc(lhs, 0);
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
 }
 
-void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
-  Bgezc(rt, 2);
-  B(label);
+void Mips64Assembler::Bc(Mips64Label* label) {
+  Buncond(label);
 }
 
-void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
-  Blezc(rt, 2);
-  B(label);
+void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) {
+  Call(label, indirect_reg);
 }
 
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLT, rs, rt);
 }
 
-void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
-  Bltzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTZ, rt);
 }
 
-void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
-  Bgtzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGTZ, rt);
 }
 
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgeuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGE, rs, rt);
 }
 
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEZ, rt);
 }
 
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bnec(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLEZ, rt);
 }
 
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Beqc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTU, rs, rt);
 }
 
-void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
-  Bnezc(rs, 2);
-  B(label);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEU, rs, rt);
 }
 
-void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
-  Beqzc(rs, 2);
-  B(label);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondEQ, rs, rt);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondNE, rs, rt);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondEQZ, rs);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondNEZ, rs);
+}
+
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
 }
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
@@ -1256,6 +1909,7 @@
                                  const std::vector<ManagedRegister>& callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1298,6 +1952,7 @@
 void Mips64Assembler::RemoveFrame(size_t frame_size,
                                   const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address
@@ -1316,6 +1971,7 @@
 
   // Then jump to the return address.
   Jr(RA);
+  Nop();
 
   // The CFI should be restored for any code that follows the exit block.
   cfi_.RestoreState();
@@ -1324,12 +1980,14 @@
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(adjust));
   cfi_.AdjustCFAOffset(-adjust);
 }
@@ -1379,17 +2037,7 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                               ManagedRegister mscratch) {
-  Mips64ManagedRegister scratch = mscratch.AsMips64();
-  CHECK(scratch.IsGpuRegister()) << scratch;
-  // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsGpuRegister(), imm);
-  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
-}
-
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1398,7 +2046,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -1415,7 +2063,9 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
+void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
+                                       ThreadOffset<kMipsDoublewordSize> src,
+                                       size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -1449,18 +2099,20 @@
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<8> offs) {
+                                             ThreadOffset<kMipsDoublewordSize> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
 }
 
-void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips";
+void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64";
 }
 
-void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
+void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64";
 }
 
 void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
@@ -1492,7 +2144,7 @@
 }
 
 void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<8> thr_offs,
+                                             ThreadOffset<kMipsDoublewordSize> thr_offs,
                                              ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
@@ -1500,7 +2152,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1561,9 +2213,12 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           FrameOffset src_base ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
@@ -1584,15 +2239,18 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset
-/*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           Offset dest_offset ATTRIBUTE_UNUSED,
+                           FrameOffset src ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
-void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) {
   // TODO: sync?
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
@@ -1604,7 +2262,7 @@
   CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
@@ -1631,7 +2289,7 @@
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -1653,7 +2311,7 @@
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   CHECK(in_reg.IsGpuRegister()) << in_reg;
-  Label null_arg;
+  Mips64Label null_arg;
   if (!out_reg.Equals(in_reg)) {
     LoadConst32(out_reg.AsGpuRegister(), 0);
   }
@@ -1663,11 +2321,13 @@
   Bind(&null_arg);
 }
 
-void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
-void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
@@ -1679,6 +2339,7 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  base.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
@@ -1691,11 +2352,13 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
@@ -1703,37 +2366,39 @@
 }
 
 void Mips64Assembler::GetCurrentThread(FrameOffset offset,
-                                       ManagedRegister /*mscratch*/) {
+                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
 }
 
 void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
-  Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
-                 S1, Thread::ExceptionOffset<8>().Int32Value());
-  Bnezc(scratch.AsGpuRegister(), slow->Entry());
+  exception_blocks_.emplace_back(scratch, stack_adjust);
+  LoadFromOffset(kLoadDoubleword,
+                 scratch.AsGpuRegister(),
+                 S1,
+                 Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+  Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
-void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
-  Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
+void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) {
+  Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving A0 as this call won't return
-  __ Move(A0, scratch_.AsGpuRegister());
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this call won't return.
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+  Move(A0, exception->scratch_.AsGpuRegister());
   // Set up call to Thread::Current()->pDeliverException
-  __ LoadFromOffset(kLoadDoubleword, T9, S1,
-                    QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
-  // TODO: check T9 usage
-  __ Jr(T9);
+  LoadFromOffset(kLoadDoubleword,
+                 T9,
+                 S1,
+                 QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+  Jr(T9);
+  Nop();
+
   // Call never returns
-  __ Break();
-#undef __
+  Break();
 }
 
 }  // namespace mips64
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 42962bc..883f013 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -17,18 +17,22 @@
 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 
+#include <utility>
 #include <vector>
 
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
-#include "utils/assembler.h"
 #include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
 
 namespace art {
 namespace mips64 {
 
+static constexpr size_t kMipsDoublewordSize = 8;
+
 enum LoadOperandType {
   kLoadSignedByte,
   kLoadUnsignedByte,
@@ -60,10 +64,57 @@
   kPositiveZero      = 0x200,
 };
 
+class Mips64Label : public Label {
+ public:
+  Mips64Label() : prev_branch_id_plus_one_(0) {}
+
+  Mips64Label(Mips64Label&& src)
+      : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+  uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64Label);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class Mips64ExceptionSlowPath {
+ public:
+  explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+  Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
+        exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+  Mips64Label* Entry() { return &exception_entry_; }
+  const Mips64ManagedRegister scratch_;
+  const size_t stack_adjust_;
+  Mips64Label exception_entry_;
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
+};
+
 class Mips64Assembler FINAL : public Assembler {
  public:
-  Mips64Assembler() {}
-  virtual ~Mips64Assembler() {}
+  Mips64Assembler()
+      : overwriting_(false),
+        overwrite_location_(0),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0) {
+    cfi().DelayEmittingAdvancePCs();
+  }
+
+  virtual ~Mips64Assembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
 
   // Emit Machine Instructions.
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
@@ -156,14 +207,12 @@
   void Dclz(GpuRegister rd, GpuRegister rs);
   void Dclo(GpuRegister rd, GpuRegister rs);
 
-  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void J(uint32_t addr26);
-  void Jal(uint32_t addr26);
   void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
   void Jr(GpuRegister rs);
   void Auipc(GpuRegister rs, uint16_t imm16);
+  void Addiupc(GpuRegister rs, uint32_t imm19);
+  void Bc(uint32_t imm26);
   void Jic(GpuRegister rt, uint16_t imm16);
   void Jialc(GpuRegister rt, uint16_t imm16);
   void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
@@ -178,6 +227,8 @@
   void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
   void Beqzc(GpuRegister rs, uint32_t imm21);
   void Bnezc(GpuRegister rs, uint32_t imm21);
+  void Bc1eqz(FpuRegister ft, uint16_t imm16);
+  void Bc1nez(FpuRegister ft, uint16_t imm16);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -217,6 +268,26 @@
   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
 
   void Cvtsw(FpuRegister fd, FpuRegister fs);
   void Cvtdw(FpuRegister fd, FpuRegister fs);
@@ -240,32 +311,36 @@
   void Clear(GpuRegister rd);
   void Not(GpuRegister rd, GpuRegister rs);
 
-  // Higher level composite instructions
+  // Higher level composite instructions.
   void LoadConst32(GpuRegister rd, int32_t value);
   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
 
-  void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
-  void Bind(Label* label) OVERRIDE;
-  void Jump(Label* label) OVERRIDE {
-    B(label);
+  void Bind(Label* label) OVERRIDE {
+    Bind(down_cast<Mips64Label*>(label));
   }
-  void B(Label* label);
-  void Jalr(Label* label, GpuRegister indirect_reg = RA);
-  // TODO: implement common for R6 and non-R6 interface for conditional branches?
-  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);
-  void Bltzc(GpuRegister rt, Label* label);
-  void Bgtzc(GpuRegister rt, Label* label);
-  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);
-  void Bgezc(GpuRegister rt, Label* label);
-  void Blezc(GpuRegister rt, Label* label);
-  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);
-  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);
-  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);
-  void Beqzc(GpuRegister rs, Label* label);
-  void Bnezc(GpuRegister rs, Label* label);
+  void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
+  }
+
+  void Bind(Mips64Label* label);
+  void Bc(Mips64Label* label);
+  void Jialc(Mips64Label* label, GpuRegister indirect_reg);
+  void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bltzc(GpuRegister rt, Mips64Label* label);
+  void Bgtzc(GpuRegister rt, Mips64Label* label);
+  void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgezc(GpuRegister rt, Mips64Label* label);
+  void Blezc(GpuRegister rt, Mips64Label* label);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqzc(GpuRegister rs, Mips64Label* label);
+  void Bnezc(GpuRegister rs, Mips64Label* label);
+  void Bc1eqz(FpuRegister ft, Mips64Label* label);
+  void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -277,43 +352,42 @@
   void Emit(uint32_t value);
 
   //
-  // Overridden common assembler high-level functionality
+  // Overridden common assembler high-level functionality.
   //
 
-  // Emit code that will create an activation on the stack
+  // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
-  // Emit code that will remove an activation from the stack
+  // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
                    const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
 
-  // Store routines
+  // Store routines.
   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
-  // Load routines
+  // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE;
+  void LoadFromThread64(ManagedRegister mdest,
+                        ThreadOffset<kMipsDoublewordSize> src,
+                        size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -322,15 +396,16 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE;
+  void LoadRawPtrFromThread64(ManagedRegister mdest,
+                              ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
 
-  // Copying routines
+  // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -354,13 +429,13 @@
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
-  // Sign extension
+  // Sign extension.
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Zero extension
+  // Zero extension.
   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Exploit fast access in managed code to Thread::Current()
+  // Exploit fast access in managed code to Thread::Current().
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
@@ -376,7 +451,7 @@
   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
                               mscratch, bool null_allowed) OVERRIDE;
 
-  // src holds a handle scope entry (Object**) load this into dst
+  // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -384,39 +459,257 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
 
-  // Call to address held at [base+offset]
+  // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+                        ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
 
+  // Emit slow paths queued during assembly and promote short branches to long if needed.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit branches and finalize all instructions.
+  void FinalizeInstructions(const MemoryRegion& region);
+
+  // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
+  // must be used instead of Mips64Label::GetPosition()).
+  uint32_t GetLabelLocation(Mips64Label* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+  enum BranchCondition {
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTZ,
+    kCondGEZ,
+    kCondLEZ,
+    kCondGTZ,
+    kCondEQ,
+    kCondNE,
+    kCondEQZ,
+    kCondNEZ,
+    kCondLTU,
+    kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
+    kUncond,
+  };
+  friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
  private:
+  class Branch {
+   public:
+    enum Type {
+      // Short branches.
+      kUncondBranch,
+      kCondBranch,
+      kCall,
+      // Long branches.
+      kLongUncondBranch,
+      kLongCondBranch,
+      kLongCall,
+    };
+
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset16 = 16,
+      kOffset18 = 18,
+      kOffset21 = 21,
+      kOffset23 = 23,
+      kOffset28 = 28,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr int32_t kMaxBranchLength = 32;
+    static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+    struct BranchInfo {
+      // Branch length as a number of 4-byte-long instructions.
+      uint32_t length;
+      // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+      // PC-relative offset (or its most significant 16-bit half, which goes first).
+      uint32_t instr_offset;
+      // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+      // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+      // instructions) from the instruction containing the offset.
+      uint32_t pc_org;
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
+      // an exception: use kOffset23 for beqzc/bnezc).
+      OffsetBits offset_size;
+      // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+      // count.
+      int offset_shift;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch.
+    Branch(uint32_t location, uint32_t target);
+    // Conditional branch.
+    Branch(uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           GpuRegister lhs_reg,
+           GpuRegister rhs_reg = ZERO);
+    // Call (branch and link) that stores the target address in a given register (i.e. T9).
+    Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+    // So, we need a way to identify such branches in order to emit no instructions for them
+    // or change them to unconditional.
+    static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+    static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    GpuRegister GetLeftRegister() const;
+    GpuRegister GetRightRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetSize() const;
+    uint32_t GetOldSize() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsLong() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    //
+    // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+    // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+    //
+    // Composite branches (made of several instructions) with longer reach have 32-bit
+    // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+    // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
+    // however. Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
+    //
+    // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+    // case with the addiu instruction and a 16 bit offset.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If the branch is short, changes its type to long.
+    void PromoteToLong();
+
+    // If necessary, updates the type by promoting a short branch to a long branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    // max_short_distance caps the maximum distance between location_ and target_
+    // that is allowed for short branches. This is for debugging/testing purposes.
+    // max_short_distance = 0 forces all short branches to become long.
+    // Use the implicit default argument when not debugging/testing.
+    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+    // Returns the location of the instruction(s) containing the offset.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    uint32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(bool is_call);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
+
+    GpuRegister lhs_reg_;        // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    GpuRegister rhs_reg_;        // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
+
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
+  };
+  friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+  friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
   void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
   void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
-  void EmitJ(int opcode, uint32_t addr26);
+  void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
+  void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+
+  void Buncond(Mips64Label* label);
+  void Bcond(Mips64Label* label,
+             BranchCondition condition,
+             GpuRegister lhs,
+             GpuRegister rhs = ZERO);
+  void Call(Mips64Label* label, GpuRegister indirect_reg);
+  void FinalizeLabeledBranch(Mips64Label* label);
+
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void PromoteBranches();
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+  void PatchCFI();
+
+  // Emits exception block.
+  void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
+
+  // List of exception blocks to generate at the end of the code cache.
+  std::vector<Mips64ExceptionSlowPath> exception_blocks_;
+
+  std::vector<Branch> branches_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Data for AdjustedPosition(), see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
 
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class Mips64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const Mips64ManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace mips64
 }  // namespace art
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 4413906..bac4375 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -24,6 +24,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPS64CpuRegisterCompare {
@@ -48,8 +50,26 @@
     return "mips64";
   }
 
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
   std::string GetAssemblerParameters() OVERRIDE {
-    return " --no-warn -march=mips64r6";
+    // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6
+    // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
   }
 
   std::string GetDisassembleParameters() OVERRIDE {
@@ -182,6 +202,71 @@
     return secondary_register_names_[reg];
   }
 
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+  void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                 mips64::Mips64Label*),
+                              std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                  mips64::GpuRegister,
+                                                                  mips64::Mips64Label*),
+                               std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips64::GpuRegister*> registers_;
   std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
@@ -194,7 +279,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-
 ///////////////////
 // FP Operations //
 ///////////////////
@@ -319,6 +403,106 @@
   DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
 }
 
+TEST_F(AssemblerMIPS64Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
 TEST_F(AssemblerMIPS64Test, CvtDL) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
 }
@@ -348,7 +532,255 @@
 ////////////////
 
 TEST_F(AssemblerMIPS64Test, Jalr) {
-  DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+  DriverStr(".set noreorder\n" +
+            RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+}
+
+TEST_F(AssemblerMIPS64Test, Jialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  std::string expected =
+      ".set noreorder\n"
+      "lapc $t9, 1f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "lapc $t9, 2f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "lapc $t9, 1b\n"
+      "jialc $t9, 0\n";
+  DriverStr(expected, "Jialc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongJialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr uint32_t kAdduCount1 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr uint32_t kAdduCount2 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  uint32_t offset_forward1 = 3 + kAdduCount1;  // 3: account for auipc, daddiu and jic.
+  offset_forward1 <<= 2;
+  offset_forward1 += (offset_forward1 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_forward2 = 3 + kAdduCount2;  // 3: account for auipc, daddiu and jic.
+  offset_forward2 <<= 2;
+  offset_forward2 += (offset_forward2 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_back = -(3 + kAdduCount2);  // 3: account for auipc, daddiu and jic.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "1:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "2:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "jialc $t9, 0\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongJialc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc) {
+  mips64::Mips64Label label1, label2;
+  __ Bc(&label1);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Bc(&label2);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Bc(&label1);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc 1f\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bc 2f\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "bc 1b\n";
+  DriverStr(expected, "Bc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Blezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgtzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgeuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+    mips64::Mips64Label label;
+    __ Bc1eqz(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1eqz(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1eqz $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1eqz $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+    mips64::Mips64Label label;
+    __ Bc1nez(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1nez(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1nez $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1nez $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1nez");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqc) {
+  mips64::Mips64Label label;
+  __ Beqc(mips64::A0, mips64::A1, &label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Beqc(mips64::A2, mips64::A3, &label);
+
+  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnec $a0, $a1, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "2:\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "bnec $a2, $a3, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqc");
 }
 
 //////////
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index e252765..77f8d6c 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -38,9 +38,10 @@
   dex2oat_target_arch := 32
 endif
 
-# We need to explcitly give the arch, as giving 'both' will make the
-# build-art-executable rule compile dex2oat for 64bits.
 ifeq ($(HOST_PREFER_32_BIT),true)
+  # We need to explicitly restrict the host arch to 32-bit only, as
+  # giving 'both' would make build-art-executable generate a build
+  # rule for a 64-bit dex2oat executable too.
   dex2oat_host_arch := 32
 else
   dex2oat_host_arch := both
@@ -56,19 +57,23 @@
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
-        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase liblz4,art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
+# Clear locals now they've served their purpose.
+dex2oat_target_arch :=
+dex2oat_host_arch :=
+
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 68cf6d9..21ce73c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -55,8 +55,10 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -69,10 +71,10 @@
 #include "oat_writer.h"
 #include "os.h"
 #include "runtime.h"
+#include "runtime_options.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
@@ -207,6 +209,11 @@
   UsageError("  --image=<file.art>: specifies the output image filename.");
   UsageError("      Example: --image=/system/framework/boot.art");
   UsageError("");
+  UsageError("  --image-format=(uncompressed|lz4):");
+  UsageError("      Which format to store the image.");
+  UsageError("      Example: --image-format=lz4");
+  UsageError("      Default: uncompressed");
+  UsageError("");
   UsageError("  --image-classes=<classname-file>: specifies classes to include in an image.");
   UsageError("      Example: --image=frameworks/base/preloaded-classes");
   UsageError("");
@@ -214,7 +221,9 @@
   UsageError("      Example: --base=0x50000000");
   UsageError("");
   UsageError("  --boot-image=<file.art>: provide the image file for the boot class path.");
+  UsageError("      Do not include the arch as part of the name, it is added automatically.");
   UsageError("      Example: --boot-image=/system/framework/boot.art");
+  UsageError("               (specifies /system/framework/<arch>/boot.art as the image file)");
   UsageError("      Default: $ANDROID_ROOT/system/framework/boot.art");
   UsageError("");
   UsageError("  --android-root=<path>: used to locate libraries for portable linking.");
@@ -306,10 +315,13 @@
   UsageError("      stripped using standard command line tools such as strip or objcopy.");
   UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
   UsageError("");
-  UsageError("  --debuggable: Produce debuggable code. Implies --generate-debug-info.");
-  UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
+  UsageError("  --debuggable: Produce code debuggable with Java debugger. Implies -g.");
+  UsageError("");
+  UsageError("  --native-debuggable: Produce code debuggable with native debugger (like LLDB).");
+  UsageError("      Implies --debuggable.");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
@@ -487,6 +499,7 @@
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
       image_classes_filename_(nullptr),
+      image_storage_mode_(ImageHeader::kStorageModeUncompressed),
       compiled_classes_zip_filename_(nullptr),
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
@@ -494,6 +507,7 @@
       app_image_(false),
       boot_image_(false),
       is_host_(false),
+      image_writer_(nullptr),
       driver_(nullptr),
       dump_stats_(false),
       dump_passes_(false),
@@ -617,6 +631,19 @@
     }
   }
 
+  void ParseImageFormat(const StringPiece& option) {
+    const StringPiece substr("--image-format=");
+    DCHECK(option.starts_with(substr));
+    const StringPiece format_str = option.substr(substr.length());
+    if (format_str == "lz4") {
+      image_storage_mode_ = ImageHeader::kStorageModeLZ4;
+    } else if (format_str == "uncompressed") {
+      image_storage_mode_ = ImageHeader::kStorageModeUncompressed;
+    } else {
+      Usage("Unknown image format: %s", format_str.data());
+    }
+  }
+
   void ProcessOptions(ParserOptions* parser_options) {
     boot_image_ = !image_filename_.empty();
     app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
@@ -663,15 +690,14 @@
       parser_options->boot_image_filename += "/framework/boot.art";
     }
     if (!parser_options->boot_image_filename.empty()) {
-      boot_image_option_ += "-Ximage:";
-      boot_image_option_ += parser_options->boot_image_filename;
+      boot_image_filename_ = parser_options->boot_image_filename;
     }
 
     if (image_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--image-classes should only be used with --image");
     }
 
-    if (image_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (image_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--image-classes should not be used with --boot-image");
     }
 
@@ -683,7 +709,7 @@
       Usage("--compiled-classes should only be used with --image");
     }
 
-    if (compiled_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+    if (compiled_classes_filename_ != nullptr && !boot_image_filename_.empty()) {
       Usage("--compiled-classes should not be used with --boot-image");
     }
 
@@ -715,7 +741,7 @@
       Usage("--zip-location should be supplied with --zip-fd");
     }
 
-    if (boot_image_option_.empty()) {
+    if (boot_image_filename_.empty()) {
       if (image_base_ == 0) {
         Usage("Non-zero --base not specified");
       }
@@ -873,6 +899,8 @@
         image_classes_filename_ = option.substr(strlen("--image-classes=")).data();
       } else if (option.starts_with("--image-classes-zip=")) {
         image_classes_zip_filename_ = option.substr(strlen("--image-classes-zip=")).data();
+      } else if (option.starts_with("--image-format=")) {
+        ParseImageFormat(option);
       } else if (option.starts_with("--compiled-classes=")) {
         compiled_classes_filename_ = option.substr(strlen("--compiled-classes=")).data();
       } else if (option.starts_with("--compiled-classes-zip=")) {
@@ -1010,20 +1038,10 @@
   // boot class path.
   bool Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
-    RuntimeOptions runtime_options;
     art::MemMap::Init();  // For ZipEntry::ExtractToMemMap.
-    if (boot_image_option_.empty()) {
-      std::string boot_class_path = "-Xbootclasspath:";
-      boot_class_path += Join(dex_filenames_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path, nullptr));
-      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
-      boot_class_path_locations += Join(dex_locations_, ':');
-      runtime_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
-    } else {
-      runtime_options.push_back(std::make_pair(boot_image_option_, nullptr));
-    }
-    for (size_t i = 0; i < runtime_args_.size(); i++) {
-      runtime_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+
+    if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
+      return false;
     }
 
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
@@ -1033,23 +1051,15 @@
         IsBootImage() ?
             CompilerCallbacks::CallbackMode::kCompileBootImage :
             CompilerCallbacks::CallbackMode::kCompileApp));
-    runtime_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-    runtime_options.push_back(
-        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
 
-    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
-    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
-    // have been stripped in preopting, anyways).
-    if (!IsBootImage()) {
-      runtime_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    RuntimeArgumentMap runtime_options;
+    if (!PrepareRuntimeOptions(&runtime_options)) {
+      return false;
     }
-    // Disable libsigchain. We don't don't need it during compilation and it prevents us
-    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
-    runtime_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
 
     {
       TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
-      if (!CreateRuntime(runtime_options)) {
+      if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
@@ -1064,66 +1074,8 @@
     // Whilst we're in native take the opportunity to initialize well known classes.
     WellKnownClasses::Init(self->GetJniEnv());
 
-    // If --image-classes was specified, calculate the full list of classes to include in the image
-    if (image_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (image_classes_zip_filename_ != nullptr) {
-        image_classes_.reset(ReadImageClassesFromZip(image_classes_zip_filename_,
-                                                     image_classes_filename_,
-                                                     &error_msg));
-      } else {
-        image_classes_.reset(ReadImageClassesFromFile(image_classes_filename_));
-      }
-      if (image_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of image classes from '" << image_classes_filename_ <<
-            "': " << error_msg;
-        return false;
-      }
-    } else if (IsBootImage()) {
-      image_classes_.reset(new std::unordered_set<std::string>);
-    }
-    // If --compiled-classes was specified, calculate the full list of classes to compile in the
-    // image.
-    if (compiled_classes_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_classes_zip_filename_ != nullptr) {
-        compiled_classes_.reset(ReadImageClassesFromZip(compiled_classes_zip_filename_,
-                                                        compiled_classes_filename_,
-                                                        &error_msg));
-      } else {
-        compiled_classes_.reset(ReadImageClassesFromFile(compiled_classes_filename_));
-      }
-      if (compiled_classes_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled classes from '"
-                   << compiled_classes_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_classes_.reset(nullptr);  // By default compile everything.
-    }
-    // If --compiled-methods was specified, read the methods to compile from the given file(s).
-    if (compiled_methods_filename_ != nullptr) {
-      std::string error_msg;
-      if (compiled_methods_zip_filename_ != nullptr) {
-        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
-                                                          compiled_methods_filename_,
-                                                          nullptr,            // No post-processing.
-                                                          &error_msg));
-      } else {
-        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
-                                                           nullptr));         // No post-processing.
-      }
-      if (compiled_methods_.get() == nullptr) {
-        LOG(ERROR) << "Failed to create list of compiled methods from '"
-            << compiled_methods_filename_ << "': " << error_msg;
-        return false;
-      }
-    } else {
-      compiled_methods_.reset(nullptr);  // By default compile everything.
-    }
-
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_option_.empty()) {
+    if (boot_image_filename_.empty()) {
       dex_files_ = class_linker->GetBootClassPath();
     } else {
       TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
@@ -1160,22 +1112,7 @@
 
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
-        for (size_t i = 0; i < dex_files_.size(); ++i) {
-          const DexFile* dex_file = dex_files_[i];
-          std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
-                                                 getpid(), i));
-          std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
-          if (tmp_file.get() == nullptr) {
-            PLOG(ERROR) << "Failed to open file " << tmp_file_name
-                << ". Try: adb shell chmod 777 /data/local/tmp";
-            continue;
-          }
-          // This is just dumping files for debugging. Ignore errors, and leave remnants.
-          UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
-          UNUSED(tmp_file->Flush());
-          UNUSED(tmp_file->Close());
-          LOG(INFO) << "Wrote input to " << tmp_file_name;
-        }
+        SaveDexInput();
       }
     }
     // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
@@ -1231,27 +1168,27 @@
 
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
+    jobject class_path_class_loader = nullptr;
     Thread* self = Thread::Current();
 
-    if (!boot_image_option_.empty()) {
+    if (!boot_image_filename_.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
       ScopedObjectAccess soa(self);
 
       // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files;
-      for (auto& class_path_file : class_path_files_) {
-        class_path_files.push_back(class_path_file.get());
-      }
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
 
       // Store the classpath we have right now.
       key_value_store_->Put(OatHeader::kClassPathKey,
                             OatFile::EncodeDexFileDependencies(class_path_files));
 
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+      class_path_class_loader = class_linker->CreatePathClassLoader(self,
+                                                                    class_path_files,
+                                                                    nullptr);
 
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
+      // Class path loader as parent so that we'll resolve there first.
+      class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader);
     }
 
     driver_.reset(new CompilerDriver(compiler_options_.get(),
@@ -1405,8 +1342,31 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
+      std::unique_ptr<ElfWriter> elf_writer =
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      if (!oat_writer->WriteRodata(rodata)) {
+        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      if (!oat_writer->WriteCode(text)) {
+        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer->GetBssSize());
+      elf_writer->WriteDynamicSection();
+      elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+      elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
+
+      if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
         return false;
       }
@@ -1517,6 +1477,16 @@
   }
 
  private:
+  template <typename T>
+  static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
+    std::vector<T*> result;
+    result.reserve(src.size());
+    for (const std::unique_ptr<T>& t : src) {
+      result.push_back(t.get());
+    }
+    return result;
+  }
+
   static size_t OpenDexFiles(const std::vector<const char*>& dex_filenames,
                              const std::vector<const char*>& dex_locations,
                              std::vector<std::unique_ptr<const DexFile>>* dex_files) {
@@ -1577,10 +1547,138 @@
     }
   }
 
+  bool PrepareImageClasses() {
+    // If --image-classes was specified, calculate the full list of classes to include in the image.
+    if (image_classes_filename_ != nullptr) {
+      image_classes_ =
+          ReadClasses(image_classes_zip_filename_, image_classes_filename_, "image");
+      if (image_classes_ == nullptr) {
+        return false;
+      }
+    } else if (IsBootImage()) {
+      image_classes_.reset(new std::unordered_set<std::string>);
+    }
+    return true;
+  }
+
+  bool PrepareCompiledClasses() {
+    // If --compiled-classes was specified, calculate the full list of classes to compile in the
+    // image.
+    if (compiled_classes_filename_ != nullptr) {
+      compiled_classes_ =
+          ReadClasses(compiled_classes_zip_filename_, compiled_classes_filename_, "compiled");
+      if (compiled_classes_ == nullptr) {
+        return false;
+      }
+    } else {
+      compiled_classes_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  static std::unique_ptr<std::unordered_set<std::string>> ReadClasses(const char* zip_filename,
+                                                                      const char* classes_filename,
+                                                                      const char* tag) {
+    std::unique_ptr<std::unordered_set<std::string>> classes;
+    std::string error_msg;
+    if (zip_filename != nullptr) {
+      classes.reset(ReadImageClassesFromZip(zip_filename, classes_filename, &error_msg));
+    } else {
+      classes.reset(ReadImageClassesFromFile(classes_filename));
+    }
+    if (classes == nullptr) {
+      LOG(ERROR) << "Failed to create list of " << tag << " classes from '"
+                 << classes_filename << "': " << error_msg;
+    }
+    return classes;
+  }
+
+  bool PrepareCompiledMethods() {
+    // If --compiled-methods was specified, read the methods to compile from the given file(s).
+    if (compiled_methods_filename_ != nullptr) {
+      std::string error_msg;
+      if (compiled_methods_zip_filename_ != nullptr) {
+        compiled_methods_.reset(ReadCommentedInputFromZip(compiled_methods_zip_filename_,
+                                                          compiled_methods_filename_,
+                                                          nullptr,            // No post-processing.
+                                                          &error_msg));
+      } else {
+        compiled_methods_.reset(ReadCommentedInputFromFile(compiled_methods_filename_,
+                                                           nullptr));         // No post-processing.
+      }
+      if (compiled_methods_.get() == nullptr) {
+        LOG(ERROR) << "Failed to create list of compiled methods from '"
+            << compiled_methods_filename_ << "': " << error_msg;
+        return false;
+      }
+    } else {
+      compiled_methods_.reset(nullptr);  // By default compile everything.
+    }
+    return true;
+  }
+
+  void SaveDexInput() {
+    for (size_t i = 0; i < dex_files_.size(); ++i) {
+      const DexFile* dex_file = dex_files_[i];
+      std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex",
+                                             getpid(), i));
+      std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
+      if (tmp_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to open file " << tmp_file_name
+            << ". Try: adb shell chmod 777 /data/local/tmp";
+        continue;
+      }
+      // This is just dumping files for debugging. Ignore errors, and leave remnants.
+      UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
+      UNUSED(tmp_file->Flush());
+      UNUSED(tmp_file->Close());
+      LOG(INFO) << "Wrote input to " << tmp_file_name;
+    }
+  }
+
+  bool PrepareRuntimeOptions(RuntimeArgumentMap* runtime_options) {
+    RuntimeOptions raw_options;
+    if (boot_image_filename_.empty()) {
+      std::string boot_class_path = "-Xbootclasspath:";
+      boot_class_path += Join(dex_filenames_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path, nullptr));
+      std::string boot_class_path_locations = "-Xbootclasspath-locations:";
+      boot_class_path_locations += Join(dex_locations_, ':');
+      raw_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
+    } else {
+      std::string boot_image_option = "-Ximage:";
+      boot_image_option += boot_image_filename_;
+      raw_options.push_back(std::make_pair(boot_image_option, nullptr));
+    }
+    for (size_t i = 0; i < runtime_args_.size(); i++) {
+      raw_options.push_back(std::make_pair(runtime_args_[i], nullptr));
+    }
+
+    raw_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+    raw_options.push_back(
+        std::make_pair("imageinstructionset", GetInstructionSetString(instruction_set_)));
+
+    // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
+    // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
+    // have been stripped in preopting, anyways).
+    if (!IsBootImage()) {
+      raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
+    }
+    // Disable libsigchain. We don't don't need it during compilation and it prevents us
+    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
+    raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
+
+    if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
+      LOG(ERROR) << "Failed to parse runtime options";
+      return false;
+    }
+    return true;
+  }
+
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(const RuntimeOptions& runtime_options)
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
-    if (!Runtime::Create(runtime_options, false)) {
+    if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
@@ -1608,7 +1706,8 @@
     image_writer_.reset(new ImageWriter(*driver_,
                                         image_base,
                                         compiler_options_->GetCompilePic(),
-                                        IsAppImage()));
+                                        IsAppImage(),
+                                        image_storage_mode_));
   }
 
   // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
@@ -1777,12 +1876,13 @@
   std::vector<const char*> dex_locations_;
   int zip_fd_;
   std::string zip_location_;
-  std::string boot_image_option_;
+  std::string boot_image_filename_;
   std::vector<const char*> runtime_args_;
   std::string image_filename_;
   uintptr_t image_base_;
   const char* image_classes_zip_filename_;
   const char* image_classes_filename_;
+  ImageHeader::StorageMode image_storage_mode_;
   const char* compiled_classes_zip_filename_;
   const char* compiled_classes_filename_;
   const char* compiled_methods_zip_filename_;
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 52e6c02..1a2f2c2 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -27,7 +27,6 @@
  * Differences between XML output and the "current.xml" file:
  * - classes in same package are not all grouped together; nothing is sorted
  * - no "deprecated" on fields and methods
- * - no "value" on fields
  * - no parameter names
  * - no generic signatures on parameters, e.g. type="java.lang.Class&lt;?&gt;"
  * - class shows declared fields and methods; does not show inherited fields
@@ -472,18 +471,19 @@
 /*
  * Callback for dumping each positions table entry.
  */
-static bool dumpPositionsCb(void* /*context*/, u4 address, u4 lineNum) {
-  fprintf(gOutFile, "        0x%04x line=%d\n", address, lineNum);
+static bool dumpPositionsCb(void* /*context*/, const DexFile::PositionInfo& entry) {
+  fprintf(gOutFile, "        0x%04x line=%d\n", entry.address_, entry.line_);
   return false;
 }
 
 /*
  * Callback for dumping locals table entry.
  */
-static void dumpLocalsCb(void* /*context*/, u2 slot, u4 startAddress, u4 endAddress,
-                         const char* name, const char* descriptor, const char* signature) {
+static void dumpLocalsCb(void* /*context*/, const DexFile::LocalInfo& entry) {
+  const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
   fprintf(gOutFile, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
-          startAddress, endAddress, slot, name, descriptor, signature);
+          entry.start_address_, entry.end_address_, entry.reg_,
+          entry.name_, entry.descriptor_, signature);
 }
 
 /*
@@ -901,11 +901,9 @@
   // Positions and locals table in the debug info.
   bool is_static = (flags & kAccStatic) != 0;
   fprintf(gOutFile, "      positions     : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, dumpPositionsCb, nullptr, nullptr);
+  pDexFile->DecodeDebugPositionInfo(pCode, dumpPositionsCb, nullptr);
   fprintf(gOutFile, "      locals        : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, nullptr, dumpLocalsCb, nullptr);
+  pDexFile->DecodeDebugLocalInfo(pCode, is_static, idx, dumpLocalsCb, nullptr);
 }
 
 /*
@@ -1019,9 +1017,126 @@
 }
 
 /*
+ * Dumps a string value with some escape characters.
+ */
+static void dumpEscapedString(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '\\':
+        fputs("\\\\", gOutFile);
+        break;
+      case '\"':
+        fputs("\\\"", gOutFile);
+        break;
+      case '\t':
+        fputs("\\t", gOutFile);
+        break;
+      case '\n':
+        fputs("\\n", gOutFile);
+        break;
+      case '\r':
+        fputs("\\r", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }
+  }
+}
+
+/*
+ * Dumps an XML attribute value between double-quotes.
+ */
+static void dumpXmlAttribute(const char* p) {
+  for (; *p; p++) {
+    switch (*p) {
+      case '&':
+        fputs("&amp;", gOutFile);
+        break;
+      case '<':
+        fputs("&lt;", gOutFile);
+        break;
+      case '"':
+        fputs("&quot;", gOutFile);
+        break;
+      case '\t':
+        fputs("&#x9;", gOutFile);
+        break;
+      case '\n':
+        fputs("&#xA;", gOutFile);
+        break;
+      case '\r':
+        fputs("&#xD;", gOutFile);
+        break;
+      default:
+        putc(*p, gOutFile);
+    }
+  }
+}
+
+/*
+ * Dumps a value of static (class) field.
+ */
+static void dumpSFieldValue(const DexFile* pDexFile,
+                            EncodedStaticFieldValueIterator::ValueType valueType,
+                            const jvalue* pValue) {
+  switch (valueType) {
+    case EncodedStaticFieldValueIterator::kByte:
+      fprintf(gOutFile, "%" PRIu8, pValue->b);
+      break;
+    case EncodedStaticFieldValueIterator::kShort:
+      fprintf(gOutFile, "%" PRId16, pValue->s);
+      break;
+    case EncodedStaticFieldValueIterator::kChar:
+      fprintf(gOutFile, "%" PRIu16, pValue->c);
+      break;
+    case EncodedStaticFieldValueIterator::kInt:
+      fprintf(gOutFile, "%" PRId32, pValue->i);
+      break;
+    case EncodedStaticFieldValueIterator::kLong:
+      fprintf(gOutFile, "%" PRId64, pValue->j);
+      break;
+    case EncodedStaticFieldValueIterator::kFloat:
+      fprintf(gOutFile, "%f", pValue->f);
+      break;
+    case EncodedStaticFieldValueIterator::kDouble:
+      fprintf(gOutFile, "%f", pValue->d);
+      break;
+    case EncodedStaticFieldValueIterator::kString: {
+      const char* str =
+          pDexFile->GetStringData(pDexFile->GetStringId(pValue->i));
+      if (gOptions.outputFormat == OUTPUT_PLAIN) {
+        fputs("\"", gOutFile);
+        dumpEscapedString(str);
+        fputs("\"", gOutFile);
+      } else {
+        dumpXmlAttribute(str);
+      }
+      break;
+    }
+    case EncodedStaticFieldValueIterator::kNull:
+      fputs("null", gOutFile);
+      break;
+    case EncodedStaticFieldValueIterator::kBoolean:
+      fputs(pValue->z ? "true" : "false", gOutFile);
+      break;
+
+    case EncodedStaticFieldValueIterator::kAnnotation:
+    case EncodedStaticFieldValueIterator::kArray:
+    case EncodedStaticFieldValueIterator::kEnum:
+    case EncodedStaticFieldValueIterator::kField:
+    case EncodedStaticFieldValueIterator::kMethod:
+    case EncodedStaticFieldValueIterator::kType:
+    default:
+      fprintf(gOutFile, "Unexpected static field type: %d", valueType);
+  }
+}
+
+/*
  * Dumps a static (class) field.
  */
-static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
+static void dumpSField(const DexFile* pDexFile, u4 idx, u4 flags, int i,
+                       EncodedStaticFieldValueIterator::ValueType valueType,
+                       const jvalue* pValue) {
   // Bail for anything private if export only requested.
   if (gOptions.exportsOnly && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
@@ -1038,6 +1153,11 @@
     fprintf(gOutFile, "      name          : '%s'\n", name);
     fprintf(gOutFile, "      type          : '%s'\n", typeDescriptor);
     fprintf(gOutFile, "      access        : 0x%04x (%s)\n", flags, accessStr);
+    if (pValue != nullptr) {
+      fputs("      value         : ", gOutFile);
+      dumpSFieldValue(pDexFile, valueType, pValue);
+      fputs("\n", gOutFile);
+    }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
     char *tmp = descriptorToDot(typeDescriptor);
@@ -1050,7 +1170,12 @@
     fprintf(gOutFile, " final=%s\n", quotedBool((flags & kAccFinal) != 0));
     // The "deprecated=" is not knowable w/o parsing annotations.
     fprintf(gOutFile, " visibility=%s\n", quotedVisibility(flags));
-    fprintf(gOutFile, ">\n</field>\n");
+    if (pValue != nullptr) {
+      fputs(" value=\"", gOutFile);
+      dumpSFieldValue(pDexFile, valueType, pValue);
+      fputs("\"\n", gOutFile);
+    }
+    fputs(">\n</field>\n", gOutFile);
   }
 
   free(accessStr);
@@ -1060,7 +1185,8 @@
  * Dumps an instance field.
  */
 static void dumpIField(const DexFile* pDexFile, u4 idx, u4 flags, int i) {
-  dumpSField(pDexFile, idx, flags, i);
+  dumpSField(pDexFile, idx, flags, i,
+             EncodedStaticFieldValueIterator::kByte, nullptr);
 }
 
 /*
@@ -1192,6 +1318,8 @@
       fprintf(gOutFile, " extends=\"%s\"\n", tmp);
       free(tmp);
     }
+    fprintf(gOutFile, " interface=%s\n",
+            quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
     fprintf(gOutFile, " abstract=%s\n", quotedBool((pClassDef.access_flags_ & kAccAbstract) != 0));
     fprintf(gOutFile, " static=%s\n", quotedBool((pClassDef.access_flags_ & kAccStatic) != 0));
     fprintf(gOutFile, " final=%s\n", quotedBool((pClassDef.access_flags_ & kAccFinal) != 0));
@@ -1222,10 +1350,23 @@
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Static fields     -\n");
     }
+    EncodedStaticFieldValueIterator staticFieldValues(*pDexFile, pClassDef);
     for (int i = 0; pClassData.HasNextStaticField(); i++, pClassData.Next()) {
+      EncodedStaticFieldValueIterator::ValueType valueType =
+          EncodedStaticFieldValueIterator::kByte;
+      const jvalue* pValue = nullptr;
+      if (staticFieldValues.HasNext()) {
+        valueType = staticFieldValues.GetValueType();
+        pValue = &staticFieldValues.GetJavaValue();
+      }
       dumpSField(pDexFile, pClassData.GetMemberIndex(),
-                           pClassData.GetRawMemberAccessFlags(), i);
+                           pClassData.GetRawMemberAccessFlags(), i,
+                 valueType, pValue);
+      if (staticFieldValues.HasNext()) {
+        staticFieldValues.Next();
+      }
     }  // for
+    DCHECK(!staticFieldValues.HasNext());
     if (gOptions.outputFormat == OUTPUT_PLAIN) {
       fprintf(gOutFile, "  Instance fields   -\n");
     }
diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc
index 4230cb2..9819233 100644
--- a/dexdump/dexdump_test.cc
+++ b/dexdump/dexdump_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 1d0f75e..d20c169 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -80,10 +80,10 @@
  * first line in the method, which *should* correspond to the first
  * entry from the table.  (Could also use "min" here.)
  */
-static bool positionsCb(void* context, u4 /*address*/, u4 lineNum) {
+static bool positionsCb(void* context, const DexFile::PositionInfo& entry) {
   int* pFirstLine = reinterpret_cast<int *>(context);
   if (*pFirstLine == -1) {
-    *pFirstLine = lineNum;
+    *pFirstLine = entry.line_;
   }
   return 0;
 }
@@ -92,7 +92,7 @@
  * Dumps a method.
  */
 static void dumpMethod(const DexFile* pDexFile,
-                       const char* fileName, u4 idx, u4 flags,
+                       const char* fileName, u4 idx, u4 flags ATTRIBUTE_UNUSED,
                        const DexFile::CodeItem* pCode, u4 codeOffset) {
   // Abstract and native methods don't get listed.
   if (pCode == nullptr || codeOffset == 0) {
@@ -121,9 +121,7 @@
 
   // Find the first line.
   int firstLine = -1;
-  bool is_static = (flags & kAccStatic) != 0;
-  pDexFile->DecodeDebugInfo(
-     pCode, is_static, idx, positionsCb, nullptr, &firstLine);
+  pDexFile->DecodeDebugPositionInfo(pCode, positionsCb, &firstLine);
 
   // Method signature.
   const Signature signature = pDexFile->GetMethodSignature(pMethodId);
diff --git a/dexlist/dexlist_test.cc b/dexlist/dexlist_test.cc
index 82179de..9a65ba6 100644
--- a/dexlist/dexlist_test.cc
+++ b/dexlist/dexlist_test.cc
@@ -37,7 +37,7 @@
   virtual void SetUp() {
     CommonRuntimeTest::SetUp();
     // Dogfood our own lib core dex file.
-    dex_file_ = GetLibCoreDexFileName();
+    dex_file_ = GetLibCoreDexFileNames()[0];
   }
 
   // Runs test with given arguments.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index c2f23aa..cd64a4f 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -111,6 +111,8 @@
   { kRTypeMask | (0x1f << 21), 63, "dsra32", "DTA", },
 
   // SPECIAL0
+  { kSpecial0Mask | 0x307ff, 1, "movf", "DSc" },
+  { kSpecial0Mask | 0x307ff, 0x10001, "movt", "DSc" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
   { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" },
@@ -139,6 +141,7 @@
   // SPECIAL2
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 2, "mul", "DST" },
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 32, "clz", "DS" },
+  { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 33, "clo", "DS" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 0, "madd", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 1, "maddu", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 2, "mul", "DST" },
@@ -148,13 +151,34 @@
 
   // SPECIAL3
   { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 3, "dext", "TSAZ", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (16 << 6) | 32, "seb", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (24 << 6) | 32, "seh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 32, "bitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 36, "dbitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 36, "dsbh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (5 << 6) | 36, "dshd", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 32, "wsbh", "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (16 << 6) | 32,
+    "seb",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (24 << 6) | 32,
+    "seh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 32,
+    "bitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 36,
+    "dbitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 36,
+    "dsbh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (5 << 6) | 36,
+    "dshd",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 32,
+    "wsbh",
+    "DT", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x26, "sc", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x27, "scd", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x36, "ll", "Tl", },
@@ -194,6 +218,11 @@
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
 
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21), "bc1f", "cB" },
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21) | (1 << 16), "bc1t", "cB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (9 << 21), "bc1eqz", "tB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (13 << 21), "bc1nez", "tB" },
+
   { kITypeMask | (0x1f << 21), 22 << kOpcodeShift, "blezc", "TB" },
 
   // TODO: de-dup
@@ -311,6 +340,26 @@
   { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21), "mthc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 1, "cmp.un.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 2, "cmp.eq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 3, "cmp.ueq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 4, "cmp.lt.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 5, "cmp.ult.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 6, "cmp.le.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 7, "cmp.ule.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 17, "cmp.or.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 18, "cmp.une.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 19, "cmp.ne.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 1, "cmp.un.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 2, "cmp.eq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 3, "cmp.ueq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 4, "cmp.lt.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 5, "cmp.ult.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 6, "cmp.le.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 7, "cmp.ule.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 17, "cmp.or.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 18, "cmp.une.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 19, "cmp.ne.d", "adt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" },
@@ -334,6 +383,13 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 36, "cvt.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 37, "cvt.l", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 38, "cvt.ps", "fad" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 49, "c.un", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 50, "c.eq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 51, "c.ueq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 52, "c.olt", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 53, "c.ult", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 54, "c.ole", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 55, "c.ule", "fCdt" },
   { kFpMask, kCop1 | 0x10, "sel", "fadt" },
   { kFpMask, kCop1 | 0x1e, "max", "fadt" },
   { kFpMask, kCop1 | 0x1c, "min", "fadt" },
@@ -386,6 +442,12 @@
                    << StringPrintf("  ; %+d", offset);
             }
             break;
+          case 'C':  // Floating-point condition code flag in c.<cond>.fmt.
+            args << "cc" << (sa >> 2);
+            break;
+          case 'c':  // Floating-point condition code flag in bc1f/bc1t and movf/movt.
+            args << "cc" << (rt >> 2);
+            break;
           case 'D': args << 'r' << rd; break;
           case 'd': args << 'f' << rd; break;
           case 'a': args << 'f' << sa; break;
diff --git a/imgdiag/Android.mk b/imgdiag/Android.mk
index d5d7c22..83315be 100644
--- a/imgdiag/Android.mk
+++ b/imgdiag/Android.mk
@@ -25,4 +25,8 @@
 # that the image it's analyzing be the same ISA as the runtime ISA.
 
 # Build variants {target,host} x {debug,ndebug} x {32,64}
-$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both))
+#
+# Honor HOST_PREFER_32_BIT, as building a 64-bit imgdiag executable
+# when HOST_PREFER_32_BIT is true would require an unmet dependency on
+# 64-bit libbacktrace.
+$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both,$(HOST_PREFER_32_BIT)))
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d6a8c9..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,11 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -132,7 +137,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cd83de6..bad928e 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,6 +28,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -41,6 +42,8 @@
 #include "gc/space/space-inl.h"
 #include "image.h"
 #include "indenter.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -51,7 +54,6 @@
 #include "oat_file-inl.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -116,7 +118,7 @@
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
     builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
 
     builder_->Start();
@@ -162,7 +164,7 @@
 
     builder_->End();
 
-    return builder_->Good() && output_stream->Flush();
+    return builder_->Good();
   }
 
   void Walk(Callback callback) {
@@ -1662,6 +1664,8 @@
         ImageHeader::kSectionDexCacheArrays);
     const auto& intern_section = image_header_.GetImageSection(
         ImageHeader::kSectionInternedStrings);
+    const auto& class_table_section = image_header_.GetImageSection(
+        ImageHeader::kSectionClassTable);
     stats_.header_bytes = header_bytes;
     stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
     // Add padding between the field and method section.
@@ -1678,6 +1682,7 @@
     stats_.art_method_bytes += method_section.Size();
     stats_.dex_cache_arrays_bytes += dex_cache_arrays_section.Size();
     stats_.interned_strings_bytes += intern_section.Size();
+    stats_.class_table_bytes += class_table_section.Size();
     stats_.Dump(os, indent_os);
     os << "\n";
 
@@ -2068,6 +2073,7 @@
     size_t art_method_bytes;
     size_t dex_cache_arrays_bytes;
     size_t interned_strings_bytes;
+    size_t class_table_bytes;
     size_t bitmap_bytes;
     size_t alignment_bytes;
 
@@ -2099,6 +2105,7 @@
           art_method_bytes(0),
           dex_cache_arrays_bytes(0),
           interned_strings_bytes(0),
+          class_table_bytes(0),
           bitmap_bytes(0),
           alignment_bytes(0),
           managed_code_bytes(0),
@@ -2261,6 +2268,7 @@
                                   "art_method_bytes       =  %8zd (%2.0f%% of art file bytes)\n"
                                   "dex_cache_arrays_bytes =  %8zd (%2.0f%% of art file bytes)\n"
                                   "interned_string_bytes  =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "class_table_bytes      =  %8zd (%2.0f%% of art file bytes)\n"
                                   "bitmap_bytes           =  %8zd (%2.0f%% of art file bytes)\n"
                                   "alignment_bytes        =  %8zd (%2.0f%% of art file bytes)\n\n",
                                   header_bytes, PercentOfFileBytes(header_bytes),
@@ -2271,11 +2279,14 @@
                                   PercentOfFileBytes(dex_cache_arrays_bytes),
                                   interned_strings_bytes,
                                   PercentOfFileBytes(interned_strings_bytes),
+                                  class_table_bytes, PercentOfFileBytes(class_table_bytes),
                                   bitmap_bytes, PercentOfFileBytes(bitmap_bytes),
                                   alignment_bytes, PercentOfFileBytes(alignment_bytes))
             << std::flush;
-        CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes +
-                 dex_cache_arrays_bytes + interned_strings_bytes + bitmap_bytes + alignment_bytes);
+        CHECK_EQ(file_bytes,
+                 header_bytes + object_bytes + art_field_bytes + art_method_bytes +
+                 dex_cache_arrays_bytes + interned_strings_bytes + class_table_bytes +
+                 bitmap_bytes + alignment_bytes);
       }
 
       os << "object_bytes breakdown:\n";
@@ -2369,7 +2380,7 @@
 static int DumpImage(Runtime* runtime, const char* image_location, OatDumperOptions* options,
                      std::ostream* os) {
   // Dumping the image, no explicit class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   ScopedObjectAccess soa(Thread::Current());
@@ -2412,7 +2423,7 @@
 
   // Need a class loader.
   // Fake that we're a compiler.
-  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path);
+  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr);
 
   // Use the class loader while dumping.
   StackHandleScope<1> scope(self);
@@ -2428,7 +2439,7 @@
 static int DumpOatWithoutRuntime(OatFile* oat_file, OatDumperOptions* options, std::ostream* os) {
   CHECK(oat_file != nullptr && options != nullptr);
   // No image = no class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   OatDumper oat_dumper(*oat_file, *options);
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 3d9f7dc..46ab34b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -153,6 +153,12 @@
     return false;
   }
 
+  if (image_header.GetStorageMode() != ImageHeader::kStorageModeUncompressed) {
+    LOG(ERROR) << "Patchoat is not supported with compressed image files "
+               << input_image->GetPath();
+    return false;
+  }
+
   /*bool is_image_pic = */IsImagePic(image_header, input_image->GetPath());
   // Nothing special to do right now since the image always needs to get patched.
   // Perhaps in some far-off future we may have images with relative addresses that are true-PIC.
@@ -526,6 +532,20 @@
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
 
+void PatchOat::PatchClassTable(const ImageHeader* image_header) {
+  const auto& section = image_header->GetImageSection(ImageHeader::kSectionClassTable);
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  ClassTable temp_table;
+  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  FixupRootVisitor visitor(this);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&visitor, RootInfo(kRootUnknown));
+  temp_table.VisitRoots(buffered_visitor);
+}
+
+
 class RelocatedPointerVisitor {
  public:
   explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
@@ -606,6 +626,7 @@
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
   PatchInternedStrings(image_header);
+  PatchClassTable(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 0915014..38bd865 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -116,6 +116,8 @@
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchClassTable(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0b0f094..6264578 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -27,6 +27,7 @@
   base/arena_allocator.cc \
   base/arena_bit_vector.cc \
   base/bit_vector.cc \
+  base/file_magic.cc \
   base/hex_dump.cc \
   base/logging.cc \
   base/mutex.cc \
@@ -41,6 +42,7 @@
   check_jni.cc \
   class_linker.cc \
   class_table.cc \
+  code_simulator_container.cc \
   common_throws.cc \
   debugger.cc \
   dex_file.cc \
@@ -100,9 +102,11 @@
   jdwp/jdwp_socket.cc \
   jdwp/object_registry.cc \
   jni_env_ext.cc \
+  jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
@@ -227,6 +231,16 @@
 LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
+# Keep the __jit_debug_register_code symbol as a unique symbol during ICF for architectures where
+# we use gold as the linker (arm, x86, x86_64). The symbol is used by the debuggers to detect when
+# new jit code is generated. We don't want it to be called when a different function with the same
+# (empty) body is called.
+JIT_DEBUG_REGISTER_CODE_LDFLAGS := -Wl,--keep-unique,__jit_debug_register_code
+LIBART_TARGET_LDFLAGS_arm    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86    := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+LIBART_TARGET_LDFLAGS_x86_64 := $(JIT_DEBUG_REGISTER_CODE_LDFLAGS)
+JIT_DEBUG_REGISTER_CODE_LDFLAGS :=
+
 LIBART_TARGET_SRC_FILES := \
   $(LIBART_COMMON_SRC_FILES) \
   jdwp/jdwp_adb.cc \
@@ -349,6 +363,8 @@
   thread_state.h \
   verifier/method_verifier.h
 
+LIBOPENJDKJVM_SRC_FILES := openjdkjvm/OpenjdkJvm.cc
+
 LIBART_CFLAGS := -DBUILDING_LIBART=1
 
 LIBART_TARGET_CFLAGS :=
@@ -385,8 +401,9 @@
 
 # $(1): target or host
 # $(2): ndebug or debug
-# $(3): static or shared (empty means shared, applies only for host)
-define build-libart
+# $(3): static or shared (note that static only applies for host)
+# $(4): module name : either libart or libopenjdkjvm
+define build-runtime-library
   ifneq ($(1),target)
     ifneq ($(1),host)
       $$(error expected target or host for argument 1, received $(1))
@@ -397,6 +414,11 @@
       $$(error expected ndebug or debug for argument 2, received $(2))
     endif
   endif
+  ifneq ($(4),libart)
+    ifneq ($(4),libopenjdkjvm)
+      $$(error expected libart of libopenjdkjvm for argument 4, received $(4))
+    endif
+  endif
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
@@ -405,12 +427,12 @@
   include $$(CLEAR_VARS)
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
-    LOCAL_MODULE := libart
+    LOCAL_MODULE := $(4)
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
     endif
   else # debug
-    LOCAL_MODULE := libartd
+    LOCAL_MODULE := $(4)d
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -421,17 +443,25 @@
     LOCAL_MODULE_CLASS := SHARED_LIBRARIES
   endif
 
-  ifeq ($$(art_target_or_host),target)
-    LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
-    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-      $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
-  else # host
-    LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
-    LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
-    LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
-    LOCAL_IS_HOST_MODULE := true
+  ifeq ($(4),libart)
+    ifeq ($$(art_target_or_host),target)
+      LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
+      $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+        $$(eval LOCAL_SRC_FILES_$$(arch) := $$$$(LIBART_TARGET_SRC_FILES_$$(arch))))
+    else # host
+      LOCAL_SRC_FILES := $$(LIBART_HOST_SRC_FILES)
+      LOCAL_SRC_FILES_32 := $$(LIBART_HOST_SRC_FILES_32)
+      LOCAL_SRC_FILES_64 := $$(LIBART_HOST_SRC_FILES_64)
+      LOCAL_IS_HOST_MODULE := true
+    endif
+  else # libopenjdkjvm
+    LOCAL_SRC_FILES := $$(LIBOPENJDKJVM_SRC_FILES)
+    ifeq ($$(art_target_or_host),host)
+      LOCAL_IS_HOST_MODULE := true
+    endif
   endif
 
+ifeq ($(4),libart)
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
@@ -442,12 +472,15 @@
 	$$(transform-generated-source)
 
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
+endif
 
   LOCAL_CFLAGS := $$(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $$(LIBART_LDFLAGS)
   ifeq ($$(art_target_or_host),target)
     LOCAL_CFLAGS += $$(LIBART_TARGET_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS)
+    $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
+      $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
   else #host
     LOCAL_CFLAGS += $$(LIBART_HOST_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS)
@@ -455,8 +488,6 @@
       LOCAL_LDFLAGS += -static
     endif
   endif
-  $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
-    $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
 
   # Clang usage
   ifeq ($$(art_target_or_host),target)
@@ -489,9 +520,19 @@
   LOCAL_C_INCLUDES += art
 
   ifeq ($$(art_static_or_shared),static)
-    LOCAL_STATIC_LIBRARIES := libnativehelper libnativebridge libsigchain_dummy libbacktrace
+    LOCAL_STATIC_LIBRARIES := libnativehelper
+    LOCAL_STATIC_LIBRARIES += libnativebridge
+    LOCAL_STATIC_LIBRARIES += libnativeloader
+    LOCAL_STATIC_LIBRARIES += libsigchain_dummy
+    LOCAL_STATIC_LIBRARIES += libbacktrace
+    LOCAL_STATIC_LIBRARIES += liblz4
   else
-    LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain libbacktrace
+    LOCAL_SHARED_LIBRARIES := libnativehelper
+    LOCAL_SHARED_LIBRARIES += libnativebridge
+    LOCAL_SHARED_LIBRARIES += libnativeloader
+    LOCAL_SHARED_LIBRARIES += libsigchain
+    LOCAL_SHARED_LIBRARIES += libbacktrace
+    LOCAL_SHARED_LIBRARIES += liblz4
   endif
 
   ifeq ($$(art_target_or_host),target)
@@ -513,6 +554,15 @@
       LOCAL_SHARED_LIBRARIES += libcutils
     endif
   endif
+
+  ifeq ($(4),libopenjdkjvm)
+    ifeq ($$(art_ndebug_or_debug),ndebug)
+      LOCAL_SHARED_LIBRARIES += libart
+    else
+      LOCAL_SHARED_LIBRARIES += libartd
+    endif
+    LOCAL_NOTICE_FILE := $(LOCAL_PATH)/openjdkjvm/NOTICE
+  endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
 
@@ -549,24 +599,30 @@
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-libart,host,ndebug))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,host,ndebug,shared,libopenjdkjvm))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,ndebug,static))
+    $(eval $(call build-runtime-library,host,ndebug,static,libart))
+    $(eval $(call build-runtime-library,host,ndebug,static,libopenjdkjvm))
   endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-libart,host,debug))
+  $(eval $(call build-runtime-library,host,debug,shared,libart))
+  $(eval $(call build-runtime-library,host,debug,shared,libopenjdkjvm))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-libart,host,debug,static))
+    $(eval $(call build-runtime-library,host,debug,static,libart))
+    $(eval $(call build-runtime-library,host,debug,static,libopenjdkjvm))
   endif
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-#  $(error $(call build-libart,target,ndebug))
-  $(eval $(call build-libart,target,ndebug))
+#  $(error $(call build-runtime-library,target,ndebug))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libart))
+  $(eval $(call build-runtime-library,target,ndebug,shared,libopenjdkjvm))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-libart,target,debug))
+  $(eval $(call build-runtime-library,target,debug,shared,libart))
+  $(eval $(call build-runtime-library,target,debug,shared,libopenjdkjvm))
 endif
 
 # Clear locally defined variables.
@@ -575,8 +631,14 @@
 LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
 LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
+LIBART_TARGET_LDFLAGS :=
+LIBART_TARGET_LDFLAGS_arm :=
+LIBART_TARGET_LDFLAGS_arm64 :=
+LIBART_TARGET_LDFLAGS_x86 :=
+LIBART_TARGET_LDFLAGS_x86_64 :=
+LIBART_TARGET_LDFLAGS_mips :=
+LIBART_TARGET_LDFLAGS_mips64 :=
 LIBART_TARGET_SRC_FILES :=
 LIBART_TARGET_SRC_FILES_arm :=
 LIBART_TARGET_SRC_FILES_arm64 :=
@@ -591,4 +653,4 @@
 LIBART_CFLAGS :=
 LIBART_TARGET_CFLAGS :=
 LIBART_HOST_CFLAGS :=
-build-libart :=
+build-runtime-library :=
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index d6ba304..1680bbd 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -30,6 +30,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* const runtime = Runtime::Current();
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index be33b0e..7141181 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -166,6 +166,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index 28d1942..51f992b 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -42,15 +42,15 @@
   // Look for variants that have divide support.
   static const char* arm_variants_with_div[] = {
           "cortex-a7", "cortex-a12", "cortex-a15", "cortex-a17", "cortex-a53", "cortex-a57",
-          "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
-          "cyclone", "denver", "krait", "swift"};
+          "cortex-a53.a57", "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
+          "cyclone", "denver", "krait", "swift" };
 
   bool has_div = FindVariantInArray(arm_variants_with_div, arraysize(arm_variants_with_div),
                                     variant);
 
   // Look for variants that have LPAE support.
   static const char* arm_variants_with_lpae[] = {
-      "cortex-a7", "cortex-a15", "krait", "denver"
+      "cortex-a7", "cortex-a15", "krait", "denver", "cortex-a53", "cortex-a57", "cortex-a53.a57"
   };
   bool has_lpae = FindVariantInArray(arm_variants_with_lpae, arraysize(arm_variants_with_lpae),
                                      variant);
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 63285a4..5c8ff8f 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -149,6 +149,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 74e7638..0d01ad5 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,6 +45,7 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
+      entrypoint == kQuickReadBarrierMark ||
       entrypoint == kQuickReadBarrierSlow ||
       entrypoint == kQuickReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index cba427d..51eb77f 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -274,6 +274,8 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMark = artReadBarrierMark;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierMark), "Direct C stub not marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 89f54dd..4bdb38e 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -180,6 +180,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index afa48cd..2cb2212 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -431,9 +431,9 @@
           [referrer] "r"(referrer), [hidden] "r"(hidden)
         : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
           "t8", "t9", "k0", "k1", "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__)
 #define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t"
@@ -1193,7 +1193,8 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || \
+    (defined(__mips__) && defined(__LP64__)) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -2042,7 +2043,7 @@
 }
 
 TEST_F(StubTest, StringIndexOf) {
-#if defined(__arm__) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__) || (defined(__mips__) && defined(__LP64__))
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index e200018..e593f39 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -28,6 +28,7 @@
                                             const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -93,6 +94,25 @@
   qpoints->pLockObject = art_quick_lock_object;
   qpoints->pUnlockObject = art_quick_unlock_object;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Math
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
@@ -139,6 +159,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index ef39999..42f5df4 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,6 +45,11 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_prefer_locked_add_sync[] = {
+    "atom",
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -60,6 +65,10 @@
   bool has_AVX = false;
   bool has_AVX2 = false;
 
+  bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync,
+                                               arraysize(x86_variants_prefer_locked_add_sync),
+                                               variant);
+
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -68,10 +77,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -83,11 +92,13 @@
   bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
+  bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                                has_AVX, has_AVX2, prefers_locked_add);
   } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                             has_AVX, has_AVX2, prefers_locked_add);
   }
 }
 
@@ -124,11 +135,15 @@
   const bool has_AVX2 = true;
 #endif
 
+  // No #define for memory synchronization preference.
+  const bool prefers_locked_add = false;
+
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -141,6 +156,8 @@
   bool has_SSE4_2 = false;
   bool has_AVX = false;
   bool has_AVX2 = false;
+  // No cpuinfo for memory synchronization preference.
+  const bool prefers_locked_add = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -177,10 +194,11 @@
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -204,7 +222,8 @@
       (has_SSE4_1_ == other_as_x86->has_SSE4_1_) &&
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
-      (has_AVX2_ == other_as_x86->has_AVX2_);
+      (has_AVX2_ == other_as_x86->has_AVX2_) &&
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -213,7 +232,8 @@
       (has_SSE4_1_ ? kSse4_1Bitfield : 0) |
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
-      (has_AVX2_ ? kAvx2Bitfield : 0);
+      (has_AVX2_ ? kAvx2Bitfield : 0) |
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -248,6 +268,11 @@
   } else {
     result += ",-avx2";
   }
+  if (prefers_locked_add_) {
+    result += ",lock_add";
+  } else {
+    result += ",-lock_add";
+  }
   return result;
 }
 
@@ -259,6 +284,7 @@
   bool has_SSE4_2 = has_SSE4_2_;
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
+  bool prefers_locked_add = prefers_locked_add_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -281,6 +307,10 @@
       has_AVX2 = true;
     } else if (feature == "-avx2") {
       has_AVX2 = false;
+    } else if (feature == "lock_add") {
+      prefers_locked_add = true;
+    } else if (feature == "-lock_add") {
+      prefers_locked_add = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -288,10 +318,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 7b61245..2b845f8 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,6 +60,8 @@
 
   bool HasSSE4_1() const { return has_SSE4_1_; }
 
+  bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -73,9 +75,10 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2)
+                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
       : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) {
+        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add) {
   }
 
  private:
@@ -87,6 +90,7 @@
     kSse4_2Bitfield = 8,
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
+    kPrefersLockedAdd = 64,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -94,6 +98,7 @@
   const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
+  const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 25a406b..e8d01e6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,8 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+               x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
 
@@ -39,8 +40,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 3U);
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -48,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -58,9 +60,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
@@ -75,8 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 15U);
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 79U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -84,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -94,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 15U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 463c9cf..da30331 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1686,6 +1686,14 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    PUSH eax                         // pass arg1 - obj
+    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
+    addl LITERAL(4), %esp            // pop argument
+    CFI_ADJUST_CFA_OFFSET(-4)
+    ret
+END_FUNCTION art_quick_read_barrier_mark
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     PUSH edx                         // pass arg3 - offset
     PUSH ecx                         // pass arg2 - obj
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2b38c9d..0a5d14a 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -29,6 +29,7 @@
                                                    const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -98,6 +99,25 @@
   qpoints->pLockObject = art_quick_lock_object;
   qpoints->pUnlockObject = art_quick_unlock_object;
 
+  // More math.
+  qpoints->pCos = cos;
+  qpoints->pSin = sin;
+  qpoints->pAcos = acos;
+  qpoints->pAsin = asin;
+  qpoints->pAtan = atan;
+  qpoints->pAtan2 = atan2;
+  qpoints->pCbrt = cbrt;
+  qpoints->pCosh = cosh;
+  qpoints->pExp = exp;
+  qpoints->pExpm1 = expm1;
+  qpoints->pHypot = hypot;
+  qpoints->pLog = log;
+  qpoints->pLog10 = log10;
+  qpoints->pNextAfter = nextafter;
+  qpoints->pSinh = sinh;
+  qpoints->pTan = tan;
+  qpoints->pTanh = tanh;
+
   // Math
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
@@ -143,6 +163,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index 3280177..b8000d0 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,8 +74,9 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2)
-      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) {
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                  has_AVX2, prefers_locked_add) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 5171080..4562c64 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 17d277e..883da96 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1712,6 +1712,17 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp           // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_slow
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp           // Alignment padding.
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f7ed812..effa1c5 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -106,17 +106,16 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
@@ -147,19 +146,18 @@
       mirror::IfTable* iftable = GetDeclaringClass()->GetIfTable();
       for (size_t i = 0; i < iftable->Count() && result == nullptr; i++) {
         mirror::Class* interface = iftable->GetInterface(i);
-        for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-          ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
-            result = interface_method;
+        for (ArtMethod& interface_method : interface->GetVirtualMethods(pointer_size)) {
+          if (HasSameNameAndSignature(interface_method.GetInterfaceMethodIfProxy(pointer_size))) {
+            result = &interface_method;
             break;
           }
         }
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(pointer_size)->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(pointer_size)));
   return result;
 }
 
@@ -299,7 +297,9 @@
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
         mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
+        bool from_code = false;
+        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
+        CHECK(!from_code);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
 
@@ -308,7 +308,7 @@
         if (pending_exception != nullptr) {
           self->SetException(pending_exception);
         }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5a2d6c3..8efad88 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -263,6 +263,9 @@
   mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index b548dfb..2b4826e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -122,7 +122,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 169 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
@@ -160,16 +160,16 @@
 ADD_TEST_EQ(size_t(MIRROR_OBJECT_HEADER_SIZE), sizeof(art::mirror::Object))
 
 // Offsets within java.lang.Class.
-#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (72 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (36 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (116 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
new file mode 100644
index 0000000..19ad302
--- /dev/null
+++ b/runtime/base/array_slice.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ARRAY_SLICE_H_
+#define ART_RUNTIME_BASE_ARRAY_SLICE_H_
+
+#include "length_prefixed_array.h"
+#include "stride_iterator.h"
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/iteration_range.h"
+
+namespace art {
+
+// An ArraySlice is an abstraction over an array or a part of an array of a particular type. It does
+// bounds checking and can be made from several common array-like structures in Art.
+template<typename T>
+class ArraySlice {
+ public:
+  // Create an empty array slice.
+  ArraySlice() : array_(nullptr), size_(0), element_size_(0) {}
+
+  // Create an array slice of the first 'length' elements of the array, with each element being
+  // element_size bytes long.
+  ArraySlice(T* array,
+             size_t length,
+             size_t element_size = sizeof(T))
+      : array_(array),
+        size_(dchecked_integral_cast<uint32_t>(length)),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || length == 0);
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long. Both start_offset and end_offset are in
+  // element_size units.
+  ArraySlice(T* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || size_ == 0);
+    DCHECK_LE(start_offset, end_offset);
+    if (size_ != 0) {
+      uintptr_t offset = start_offset * element_size_;
+      array_ = *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array) + offset);
+    }
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long and having the given alignment. Both start_offset
+  // and end_offset are in element_size units.
+  ArraySlice(LengthPrefixedArray<T>* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T),
+             size_t alignment = alignof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array != nullptr || size_ == 0);
+    if (size_ != 0) {
+      DCHECK_LE(start_offset, end_offset);
+      DCHECK_LE(start_offset, array->size());
+      DCHECK_LE(end_offset, array->size());
+      array_ = &array->At(start_offset, element_size_, alignment);
+    }
+  }
+
+  T& At(size_t index) {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  const T& At(size_t index) const {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  T& operator[](size_t index) {
+    return At(index);
+  }
+
+  const T& operator[](size_t index) const {
+    return At(index);
+  }
+
+  StrideIterator<T> begin() {
+    return StrideIterator<T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<const T> begin() const {
+    return StrideIterator<const T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<T> end() {
+    return StrideIterator<T>(&AtUnchecked(size_), element_size_);
+  }
+
+  StrideIterator<const T> end() const {
+    return StrideIterator<const T>(&AtUnchecked(size_), element_size_);
+  }
+
+  IterationRange<StrideIterator<T>> AsRange() {
+    return size() != 0 ? MakeIterationRange(begin(), end())
+                       : MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
+  }
+
+  size_t size() const {
+    return size_;
+  }
+
+  size_t ElementSize() const {
+    return element_size_;
+  }
+
+ private:
+  T& AtUnchecked(size_t index) {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  const T& AtUnchecked(size_t index) const {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  T* array_;
+  size_t size_;
+  size_t element_size_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ARRAY_SLICE_H_
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index d6a44f7..8430d68 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -23,6 +23,7 @@
 
 #include "base/logging.h"
 #include "base/iteration_range.h"
+#include "base/stl_util.h"
 
 namespace art {
 
@@ -108,12 +109,12 @@
 }
 
 // For rounding integers.
-// NOTE: In the absence of std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+// Note: Omit the `n` from T type deduction, deduce only from the `x` argument.
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) WARN_UNUSED;
+static constexpr T RoundDown(T x, typename Identity<T>::type n) WARN_UNUSED;
 
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) {
+static constexpr T RoundDown(T x, typename Identity<T>::type n) {
   return
       DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0))
       (x & -n);
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
index 2bd12df..51dfba8 100644
--- a/runtime/base/dchecked_vector.h
+++ b/runtime/base/dchecked_vector.h
@@ -33,7 +33,7 @@
 // but we do not use exceptions, so this accessor is deliberately hidden.
 // Note: The common pattern &v[0] used to retrieve pointer to the data is not
 // valid for an empty dchecked_vector<>. Use data() to avoid checking empty().
-template <typename T, typename Alloc>
+template <typename T, typename Alloc = std::allocator<T>>
 class dchecked_vector : private std::vector<T, Alloc> {
  private:
   // std::vector<> has a slightly different specialization for bool. We don't provide that.
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
new file mode 100644
index 0000000..9756338
--- /dev/null
+++ b/runtime/base/file_magic.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_magic.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+#include "dex_file.h"
+#include "stringprintf.h"
+
+namespace art {
+
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+  CHECK(magic != nullptr);
+  ScopedFd fd(open(filename, O_RDONLY, 0));
+  if (fd.get() == -1) {
+    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
+    return ScopedFd();
+  }
+  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
+  if (n != sizeof(*magic)) {
+    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
+    return ScopedFd();
+  }
+  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
+    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
+                              strerror(errno));
+    return ScopedFd();
+  }
+  return fd;
+}
+
+bool IsZipMagic(uint32_t magic) {
+  return (('P' == ((magic >> 0) & 0xff)) &&
+          ('K' == ((magic >> 8) & 0xff)));
+}
+
+bool IsDexMagic(uint32_t magic) {
+  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
+}
+
+}  // namespace art
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
new file mode 100644
index 0000000..f7e4bad
--- /dev/null
+++ b/runtime/base/file_magic.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_FILE_MAGIC_H_
+#define ART_RUNTIME_BASE_FILE_MAGIC_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "ScopedFd.h"
+
+namespace art {
+
+// Open file and read magic number
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+
+// Check whether the given magic matches a known file type.
+bool IsZipMagic(uint32_t magic);
+bool IsDexMagic(uint32_t magic);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_FILE_MAGIC_H_
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 95baa82..fc1a52f 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -236,7 +236,7 @@
 
   // Returns how large the table is after being written. If target is null, then no writing happens
   // but the size is still returned. Target must be 8 byte aligned.
-  size_t WriteToMemory(uint8_t* ptr) {
+  size_t WriteToMemory(uint8_t* ptr) const {
     size_t offset = 0;
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
@@ -457,7 +457,7 @@
   }
 
   // Make sure that everything reinserts in the right spot. Returns the number of errors.
-  size_t Verify() {
+  size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
     size_t errors = 0;
     for (size_t i = 0; i < num_buckets_; ++i) {
       T& element = data_[i];
diff --git a/runtime/length_prefixed_array.h b/runtime/base/length_prefixed_array.h
similarity index 95%
rename from runtime/length_prefixed_array.h
rename to runtime/base/length_prefixed_array.h
index e01b6cc..d632871 100644
--- a/runtime/length_prefixed_array.h
+++ b/runtime/base/length_prefixed_array.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
-#define ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
+#ifndef ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
+#define ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
 
 #include <stddef.h>  // for offsetof()
 
@@ -110,4 +110,4 @@
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
+#endif  // ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 2cd1a4d..115c260 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -48,6 +48,7 @@
   bool oat;
   bool profiler;
   bool signals;
+  bool simulator;
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index d4c9057..263f50d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -63,6 +63,9 @@
   kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kTransactionLogLock,
   kMarkSweepMarkStackLock,
   kJniWeakGlobalsLock,
@@ -73,9 +76,6 @@
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
   kJitCodeCacheLock,
-  kRosAllocGlobalLock,
-  kRosAllocBracketLock,
-  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 0949619..ad03c31 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -149,6 +149,30 @@
   return it != container.end();
 }
 
+// const char* compare function suitable for std::map or std::set.
+struct CStringLess {
+  bool operator()(const char* lhs, const char* rhs) const {
+    return strcmp(lhs, rhs) < 0;
+  }
+};
+
+// Use to suppress type deduction for a function argument.
+// See std::identity<> for more background:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
+//
+// e.g. "template <typename X> void bar(identity<X>::type foo);
+//     bar(5); // compilation error
+//     bar<int>(5); // ok
+// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
+//     Base b;
+//     Derived d;
+//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
+// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
+template <typename T>
+struct Identity {
+  using type = T;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 07cadc4..78bc3d5 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -17,12 +17,22 @@
 #include "base/unix_file/fd_file.h"
 
 #include <errno.h>
+#include <limits>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "base/logging.h"
 
+// Includes needed for FdFile::Copy().
+#ifdef __linux__
+#include <sys/sendfile.h>
+#else
+#include <algorithm>
+#include "base/stl_util.h"
+#include "globals.h"
+#endif
+
 namespace unix_file {
 
 FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
@@ -222,6 +232,52 @@
   return true;
 }
 
+bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
+  off_t off = static_cast<off_t>(offset);
+  off_t sz = static_cast<off_t>(size);
+  if (offset < 0 || static_cast<int64_t>(off) != offset ||
+      size < 0 || static_cast<int64_t>(sz) != size ||
+      sz > std::numeric_limits<off_t>::max() - off) {
+    errno = EINVAL;
+    return false;
+  }
+  if (size == 0) {
+    return true;
+  }
+#ifdef __linux__
+  // Use sendfile(), available for files since linux kernel 2.6.33.
+  off_t end = off + sz;
+  while (off != end) {
+    int result = TEMP_FAILURE_RETRY(
+        sendfile(Fd(), input_file->Fd(), &off, end - off));
+    if (result == -1) {
+      return false;
+    }
+    // Ignore the number of bytes in `result`, sendfile() already updated `off`.
+  }
+#else
+  if (lseek(input_file->Fd(), off, SEEK_SET) != off) {
+    return false;
+  }
+  constexpr size_t kMaxBufferSize = 4 * ::art::kPageSize;
+  const size_t buffer_size = std::min<uint64_t>(size, kMaxBufferSize);
+  art::UniqueCPtr<void> buffer(malloc(buffer_size));
+  if (buffer == nullptr) {
+    errno = ENOMEM;
+    return false;
+  }
+  while (size != 0) {
+    size_t chunk_size = std::min<uint64_t>(buffer_size, size);
+    if (!input_file->ReadFully(buffer.get(), chunk_size) ||
+        !WriteFully(buffer.get(), chunk_size)) {
+      return false;
+    }
+    size -= chunk_size;
+  }
+#endif
+  return true;
+}
+
 void FdFile::Erase() {
   TEMP_FAILURE_RETRY(SetLength(0));
   TEMP_FAILURE_RETRY(Flush());
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index f47368b..231a1ab 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -50,12 +50,12 @@
   bool Open(const std::string& file_path, int flags, mode_t mode);
 
   // RandomAccessFile API.
-  virtual int Close() WARN_UNUSED;
-  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const WARN_UNUSED;
-  virtual int SetLength(int64_t new_length) WARN_UNUSED;
-  virtual int64_t GetLength() const;
-  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset) WARN_UNUSED;
-  virtual int Flush() WARN_UNUSED;
+  int Close() OVERRIDE WARN_UNUSED;
+  int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
+  int SetLength(int64_t new_length) OVERRIDE WARN_UNUSED;
+  int64_t GetLength() const OVERRIDE;
+  int64_t Write(const char* buf, int64_t byte_count, int64_t offset) OVERRIDE WARN_UNUSED;
+  int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
   void Erase();
@@ -77,6 +77,9 @@
   bool PreadFully(void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
   bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
 
+  // Copy data from another file.
+  bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
     kBase,           // Base, file has not been flushed or closed.
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 388f717..ecf607c 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -110,4 +110,34 @@
   ASSERT_EQ(file.Close(), 0);
 }
 
+TEST_F(FdFileTest, Copy) {
+  art::ScratchFile src_tmp;
+  FdFile src;
+  ASSERT_TRUE(src.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(src.Fd(), 0);
+  ASSERT_TRUE(src.IsOpened());
+
+  char src_data[] = "Some test data.";
+  ASSERT_TRUE(src.WriteFully(src_data, sizeof(src_data)));  // Including the zero terminator.
+  ASSERT_EQ(0, src.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
+
+  art::ScratchFile dest_tmp;
+  FdFile dest;
+  ASSERT_TRUE(dest.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(dest.Fd(), 0);
+  ASSERT_TRUE(dest.IsOpened());
+
+  ASSERT_TRUE(dest.Copy(&src, 0, sizeof(src_data)));
+  ASSERT_EQ(0, dest.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), dest.GetLength());
+
+  char check_data[sizeof(src_data)];
+  ASSERT_TRUE(dest.PreadFully(check_data, sizeof(src_data), 0u));
+  CHECK_EQ(0, memcmp(check_data, src_data, sizeof(src_data)));
+
+  ASSERT_EQ(0, dest.Close());
+  ASSERT_EQ(0, src.Close());
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 82e5d2e..531cb37 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -19,8 +19,11 @@
 
 #include <memory.h>
 #include <map>
+#include <type_traits>
 #include <utility>
 
+#include "base/stl_util.h"
+
 namespace art {
 
 //
@@ -268,8 +271,9 @@
   }
 
   // Set a value for a given key, overwriting the previous value if any.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  void Set(const TKey<TValue>& key, const TValue& value) {
+  void Set(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     // Clone the value first, to protect against &value == GetValuePtr(key).
     auto* new_value = new TValue(value);
 
@@ -279,8 +283,9 @@
 
   // Set a value for a given key, only if there was no previous value before.
   // Returns true if the value was set, false if a previous value existed.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  bool SetIfMissing(const TKey<TValue>& key, const TValue& value) {
+  bool SetIfMissing(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     TValue* ptr = Get(key);
     if (ptr == nullptr) {
       Set(key, value);
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 88a3996..ea1afa8 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -30,7 +30,7 @@
 namespace art {
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  return FindClass(self, descriptor, NullHandle<mirror::ClassLoader>());
+  return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
 }
 
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class** element_class) {
@@ -116,6 +116,7 @@
   return resolved_method;
 }
 
+template <ClassLinker::ResolveMode kResolveMode>
 inline ArtMethod* ClassLinker::ResolveMethod(Thread* self,
                                              uint32_t method_idx,
                                              ArtMethod* referrer,
@@ -127,12 +128,12 @@
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile* dex_file = h_dex_cache->GetDexFile();
-    resolved_method = ResolveMethod(*dex_file,
-                                    method_idx,
-                                    h_dex_cache,
-                                    h_class_loader,
-                                    referrer,
-                                    type);
+    resolved_method = ResolveMethod<kResolveMode>(*dex_file,
+                                                  method_idx,
+                                                  h_dex_cache,
+                                                  h_class_loader,
+                                                  referrer,
+                                                  type);
   }
   // Note: We cannot check here to see whether we added the method to the cache. It
   //       might be an erroneous class, which results in it being hidden from us.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d5a5ea6..342e1d9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -807,10 +807,7 @@
     auto* runtime = Runtime::Current();
     auto* image_space = runtime->GetHeap()->GetBootImageSpace();
     auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : klass->GetDirectMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_space);
-    }
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+    for (auto& m : klass->GetMethods(pointer_size)) {
       SanityCheckArtMethod(&m, klass, image_space);
     }
     auto* vtable = klass->GetVTable();
@@ -1017,6 +1014,15 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
+  const ImageHeader& header = space->GetImageHeader();
+  const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
+  if (section.Size() > 0u) {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
+    class_table->ReadFromMemory(space->Begin() + section.Offset());
+    dex_cache_boot_image_class_lookup_required_ = false;
+  }
+
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
@@ -1433,8 +1439,12 @@
       if (klass != nullptr) {
         *result = EnsureResolved(self, descriptor, klass);
       } else {
-        *result = DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(),
-                              *pair.first, *pair.second);
+        *result = DefineClass(self,
+                              descriptor,
+                              hash,
+                              ScopedNullHandle<mirror::ClassLoader>(),
+                              *pair.first,
+                              *pair.second);
       }
       if (*result == nullptr) {
         CHECK(self->IsExceptionPending()) << descriptor;
@@ -1559,7 +1569,11 @@
     // The boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
-      return DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(), *pair.first,
+      return DefineClass(self,
+                         descriptor,
+                         hash,
+                         ScopedNullHandle<mirror::ClassLoader>(),
+                         *pair.first,
                          *pair.second);
     } else {
       // The boot class loader is searched ahead of the application class loader, failures are
@@ -1868,12 +1882,10 @@
     // We're invoking a virtual method directly (thanks to sharpening), compute the oat_method_index
     // by search for its position in the declared virtual methods.
     oat_method_index = declaring_class->NumDirectMethods();
-    size_t end = declaring_class->NumVirtualMethods();
     bool found_virtual = false;
-    for (size_t i = 0; i < end; i++) {
+    for (ArtMethod& art_method : declaring_class->GetVirtualMethods(image_pointer_size_)) {
       // Check method index instead of identity in case of duplicate method definitions.
-      if (method->GetDexMethodIndex() ==
-          declaring_class->GetVirtualMethod(i, image_pointer_size_)->GetDexMethodIndex()) {
+      if (method->GetDexMethodIndex() == art_method.GetDexMethodIndex()) {
         found_virtual = true;
         break;
       }
@@ -2236,11 +2248,14 @@
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
-    klass->SetDirectMethodsPtr(AllocArtMethodArray(self, allocator, it.NumDirectMethods()));
-    klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, allocator, it.NumVirtualMethods()));
+    klass->SetMethodsPtr(
+        AllocArtMethodArray(self, allocator, it.NumDirectMethods() + it.NumVirtualMethods()),
+        it.NumDirectMethods(),
+        it.NumVirtualMethods());
     size_t class_def_method_index = 0;
     uint32_t last_dex_method_index = DexFile::kDexNoIndex;
     size_t last_class_def_method_index = 0;
+    // TODO These should really use the iterators.
     for (size_t i = 0; it.HasNextDirectMethod(); i++, it.Next()) {
       ArtMethod* method = klass->GetDirectMethodUnchecked(i, image_pointer_size_);
       LoadMethod(self, dex_file, it, klass, method);
@@ -2719,9 +2734,12 @@
   return nullptr;
 }
 
-void ClassLinker::UpdateClassVirtualMethods(mirror::Class* klass,
-                                            LengthPrefixedArray<ArtMethod>* new_methods) {
-  klass->SetVirtualMethodsPtr(new_methods);
+// TODO This should really be in mirror::Class.
+void ClassLinker::UpdateClassMethods(mirror::Class* klass,
+                                     LengthPrefixedArray<ArtMethod>* new_methods) {
+  klass->SetMethodsPtrUnchecked(new_methods,
+                                klass->NumDirectMethods(),
+                                klass->NumDeclaredVirtualMethods());
   // Need to mark the card so that the remembered sets and mod union tables get updated.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass);
 }
@@ -2786,9 +2804,11 @@
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
+
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(image_space);
   std::string temp;
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
@@ -3045,10 +3065,12 @@
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
   if (!preverified) {
+    Runtime* runtime = Runtime::Current();
     verifier_failure = verifier::MethodVerifier::VerifyClass(self,
                                                              klass.Get(),
-                                                             Runtime::Current()->IsAotCompiler(),
-                                                             Runtime::Current()->IsAotCompiler(),
+                                                             runtime->GetCompilerCallbacks(),
+                                                             runtime->IsAotCompiler(),
+                                                             runtime->IsAotCompiler(),
                                                              &error_msg);
   }
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
@@ -3197,11 +3219,8 @@
 
 void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
                                                     Handle<mirror::Class> klass) {
-  for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetDirectMethod(i, image_pointer_size_));
-  }
-  for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetVirtualMethod(i, image_pointer_size_));
+  for (ArtMethod& method : klass->GetMethods(image_pointer_size_)) {
+    ResolveMethodExceptionHandlerTypes(dex_file, &method);
   }
 }
 
@@ -3289,29 +3308,30 @@
   throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
-  LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, allocator, 1);
-  // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
-  // want to throw OOM in the future.
-  if (UNLIKELY(directs == nullptr)) {
-    self->AssertPendingOOMException();
-    return nullptr;
-  }
-  klass->SetDirectMethodsPtr(directs);
-  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+  const size_t num_direct_methods = 1;
 
-  // Create virtual method using specified prototypes.
+  // They have as many virtual methods as the array
   auto h_methods = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Method>*>(methods));
   DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
       << PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
-  auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods);
+
+  // Create the methods array.
+  LengthPrefixedArray<ArtMethod>* proxy_class_methods = AllocArtMethodArray(
+        self, allocator, num_direct_methods + num_virtual_methods);
   // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
   // want to throw OOM in the future.
-  if (UNLIKELY(virtuals == nullptr)) {
+  if (UNLIKELY(proxy_class_methods == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetVirtualMethodsPtr(virtuals);
+  klass->SetMethodsPtr(proxy_class_methods, num_direct_methods, num_virtual_methods);
+
+  // Create the single direct method.
+  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+
+  // Create virtual method using specified prototypes.
+  // TODO These should really use the iterators.
   for (size_t i = 0; i < num_virtual_methods; ++i) {
     auto* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
     auto* prototype = h_methods->Get(i)->GetArtMethod();
@@ -3421,7 +3441,7 @@
 
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 16u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 19u);
   ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
       2, image_pointer_size_);
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
@@ -4089,14 +4109,8 @@
   }
 
   DCHECK_EQ(temp_class->NumDirectMethods(), 0u);
-  for (auto& method : new_class->GetDirectMethods(image_pointer_size_)) {
-    if (method.GetDeclaringClass() == temp_class) {
-      method.SetDeclaringClass(new_class);
-    }
-  }
-
   DCHECK_EQ(temp_class->NumVirtualMethods(), 0u);
-  for (auto& method : new_class->GetVirtualMethods(image_pointer_size_)) {
+  for (auto& method : new_class->GetMethods(image_pointer_size_)) {
     if (method.GetDeclaringClass() == temp_class) {
       method.SetDeclaringClass(new_class);
     }
@@ -4180,8 +4194,7 @@
     // ArtMethod array pointers. If this occurs, it causes bugs in remembered sets since the GC
     // may not see any references to the target space and clean the card for a class if another
     // class had the same array pointer.
-    klass->SetDirectMethodsPtrUnchecked(nullptr);
-    klass->SetVirtualMethodsPtr(nullptr);
+    klass->SetMethodsPtrUnchecked(nullptr, 0, 0);
     klass->SetSFieldsPtrUnchecked(nullptr);
     klass->SetIFieldsPtrUnchecked(nullptr);
     if (UNLIKELY(h_new_class.Get() == nullptr)) {
@@ -4946,12 +4959,10 @@
   for (size_t k = ifstart + 1; k < iftable_count; k++) {
     // Skip ifstart since our current interface obviously cannot override itself.
     current_iface.Assign(iftable->GetInterface(k));
-    size_t num_instance_methods = current_iface->NumVirtualMethods();
-    // Iterate through every method on this interface. The order does not matter so we go forwards.
-    for (size_t m = 0; m < num_instance_methods; m++) {
-      ArtMethod* current_method = current_iface->GetVirtualMethodUnchecked(m, image_pointer_size);
+    // Iterate through every method on this interface. The order does not matter.
+    for (ArtMethod& current_method : current_iface->GetDeclaredVirtualMethods(image_pointer_size)) {
       if (UNLIKELY(target.HasSameNameAndSignature(
-                      current_method->GetInterfaceMethodIfProxy(image_pointer_size)))) {
+                      current_method.GetInterfaceMethodIfProxy(image_pointer_size)))) {
         // Check if the i'th interface is a subtype of this one.
         if (iface->IsAssignableFrom(current_iface.Get())) {
           return true;
@@ -5004,10 +5015,9 @@
     DCHECK_LT(k, iftable->Count());
 
     iface.Assign(iftable->GetInterface(k));
-    size_t num_instance_methods = iface->NumVirtualMethods();
-    // Iterate through every method on this interface. The order does not matter so we go forwards.
-    for (size_t m = 0; m < num_instance_methods; m++) {
-      ArtMethod* current_method = iface->GetVirtualMethodUnchecked(m, image_pointer_size_);
+    // Iterate through every declared method on this interface. The order does not matter.
+    for (auto& method_iter : iface->GetDeclaredVirtualMethods(image_pointer_size_)) {
+      ArtMethod* current_method = &method_iter;
       // Skip abstract methods and methods with different names.
       if (current_method->IsAbstract() ||
           !target_name_comparator.HasSameNameAndSignature(
@@ -5314,6 +5324,26 @@
   return nullptr;
 }
 
+static void SanityCheckVTable(Handle<mirror::Class> klass, uint32_t pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::PointerArray* check_vtable = klass->GetVTableDuringLinking();
+  mirror::Class* superclass = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
+  int32_t super_vtable_length = (superclass != nullptr) ? superclass->GetVTableLength() : 0;
+  for (int32_t i = 0; i < check_vtable->GetLength(); ++i) {
+    ArtMethod* m = check_vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
+    CHECK(m != nullptr);
+
+    ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
+    auto is_same_method = [m] (const ArtMethod& meth) {
+      return &meth == m;
+    };
+    CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
+          std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
+        << "While linking class '" << PrettyClass(klass.Get()) << "' unable to find owning class "
+        << "of '" << PrettyMethod(m) << "' (vtable index: " << i << ").";
+  }
+}
+
 bool ClassLinker::LinkInterfaceMethods(
     Thread* self,
     Handle<mirror::Class> klass,
@@ -5436,25 +5466,31 @@
       const bool super_interface = is_super && extend_super_iftable;
       auto method_array(hs2.NewHandle(iftable->GetMethodArray(i)));
 
-      LengthPrefixedArray<ArtMethod>* input_virtual_methods = nullptr;
-      Handle<mirror::PointerArray> input_vtable_array = NullHandle<mirror::PointerArray>();
+      ArraySlice<ArtMethod> input_virtual_methods;
+      ScopedNullHandle<mirror::PointerArray> null_handle;
+      Handle<mirror::PointerArray> input_vtable_array(null_handle);
       int32_t input_array_length = 0;
+
       // TODO Cleanup Needed: In the presence of default methods this optimization is rather dirty
       //      and confusing. Default methods should always look through all the superclasses
       //      because they are the last choice of an implementation. We get around this by looking
       //      at the super-classes iftable methods (copied into method_array previously) when we are
       //      looking for the implementation of a super-interface method but that is rather dirty.
+      bool using_virtuals;
       if (super_interface) {
-        // We are overwriting a super class interface, try to only virtual methods instead of the
+        // If we are overwriting a super class interface, try to only virtual methods instead of the
         // whole vtable.
-        input_virtual_methods = klass->GetVirtualMethodsPtr();
-        input_array_length = klass->NumVirtualMethods();
+        using_virtuals = true;
+        input_virtual_methods = klass->GetDeclaredMethodsSlice(image_pointer_size_);
+        input_array_length = input_virtual_methods.size();
       } else {
-        // A new interface, we need the whole vtable in case a new interface method is implemented
-        // in the whole superclass.
+        // For a new interface, however, we need the whole vtable in case a new
+        // interface method is implemented in the whole superclass.
+        using_virtuals = false;
         input_vtable_array = vtable;
         input_array_length = input_vtable_array->GetLength();
       }
+
       // For each method in interface
       for (size_t j = 0; j < num_methods; ++j) {
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
@@ -5475,8 +5511,8 @@
         bool found_impl = false;
         ArtMethod* vtable_impl = nullptr;
         for (int32_t k = input_array_length - 1; k >= 0; --k) {
-          ArtMethod* vtable_method = input_virtual_methods != nullptr ?
-              &input_virtual_methods->At(k, method_size, method_alignment) :
+          ArtMethod* vtable_method = using_virtuals ?
+              &input_virtual_methods[k] :
               input_vtable_array->GetElementPtrSize<ArtMethod*>(k, image_pointer_size_);
           ArtMethod* vtable_method_for_name_comparison =
               vtable_method->GetInterfaceMethodIfProxy(image_pointer_size_);
@@ -5637,38 +5673,39 @@
     VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
                        << " default_methods=" << default_methods.size()
                        << " default_conflict_methods=" << default_conflict_methods.size();
-    const size_t old_method_count = klass->NumVirtualMethods();
+    const size_t old_method_count = klass->NumMethods();
     const size_t new_method_count = old_method_count +
                                     miranda_methods.size() +
                                     default_methods.size() +
                                     default_conflict_methods.size();
     // Attempt to realloc to save RAM if possible.
-    LengthPrefixedArray<ArtMethod>* old_virtuals = klass->GetVirtualMethodsPtr();
-    // The Realloced virtual methods aren't visiblef from the class roots, so there is no issue
+    LengthPrefixedArray<ArtMethod>* old_methods = klass->GetMethodsPtr();
+    // The Realloced virtual methods aren't visible from the class roots, so there is no issue
     // where GCs could attempt to mark stale pointers due to memcpy. And since we overwrite the
     // realloced memory with out->CopyFrom, we are guaranteed to have objects in the to space since
     // CopyFrom has internal read barriers.
-    const size_t old_size = old_virtuals != nullptr
-        ? LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
-                                                      method_size,
-                                                      method_alignment)
-        : 0u;
+    //
+    // TODO We should maybe move some of this into mirror::Class or at least into another method.
+    const size_t old_size = LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
+                                                                        method_size,
+                                                                        method_alignment);
     const size_t new_size = LengthPrefixedArray<ArtMethod>::ComputeSize(new_method_count,
                                                                         method_size,
                                                                         method_alignment);
-    auto* virtuals = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-        runtime->GetLinearAlloc()->Realloc(self, old_virtuals, old_size, new_size));
-    if (UNLIKELY(virtuals == nullptr)) {
+    const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
+    auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
+        runtime->GetLinearAlloc()->Realloc(self, old_methods, old_methods_ptr_size, new_size));
+    if (UNLIKELY(methods == nullptr)) {
       self->AssertPendingOOMException();
       self->EndAssertNoThreadSuspension(old_cause);
       return false;
     }
     ScopedArenaUnorderedMap<ArtMethod*, ArtMethod*> move_table(allocator.Adapter());
-    if (virtuals != old_virtuals) {
+    if (methods != old_methods) {
       // Maps from heap allocated miranda method to linear alloc miranda method.
-      StrideIterator<ArtMethod> out = virtuals->begin(method_size, method_alignment);
-      // Copy over the old methods + miranda methods.
-      for (auto& m : klass->GetVirtualMethods(image_pointer_size_)) {
+      StrideIterator<ArtMethod> out = methods->begin(method_size, method_alignment);
+      // Copy over the old methods.
+      for (auto& m : klass->GetMethods(image_pointer_size_)) {
         move_table.emplace(&m, &*out);
         // The CopyFrom is only necessary to not miss read barriers since Realloc won't do read
         // barriers when it copies.
@@ -5676,8 +5713,7 @@
         ++out;
       }
     }
-    StrideIterator<ArtMethod> out(virtuals->begin(method_size, method_alignment)
-                                      + old_method_count);
+    StrideIterator<ArtMethod> out(methods->begin(method_size, method_alignment) + old_method_count);
     // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
     // we want the roots of the miranda methods to get visited.
     for (ArtMethod* mir_method : miranda_methods) {
@@ -5689,9 +5725,8 @@
       move_table.emplace(mir_method, &new_method);
       ++out;
     }
-    // We need to copy the default methods into our own virtual method table since the runtime
-    // requires that every method on a class's vtable be in that respective class's virtual method
-    // table.
+    // We need to copy the default methods into our own method table since the runtime requires that
+    // every method on a class's vtable be in that respective class's virtual method table.
     // NOTE This means that two classes might have the same implementation of a method from the same
     // interface but will have different ArtMethod*s for them. This also means we cannot compare a
     // default method found on a class with one found on the declaring interface directly and must
@@ -5725,8 +5760,8 @@
       move_table.emplace(conf_method, &new_method);
       ++out;
     }
-    virtuals->SetSize(new_method_count);
-    UpdateClassVirtualMethods(klass.Get(), virtuals);
+    methods->SetSize(new_method_count);
+    UpdateClassMethods(klass.Get(), methods);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
@@ -5742,7 +5777,7 @@
       self->AssertPendingOOMException();
       return false;
     }
-    out = virtuals->begin(method_size, method_alignment) + old_method_count;
+    out = methods->begin(method_size, method_alignment) + old_method_count;
     size_t vtable_pos = old_vtable_count;
     for (size_t i = old_method_count; i < new_method_count; ++i) {
       // Leave the declaring class alone as type indices are relative to it
@@ -5796,8 +5831,16 @@
       }
     }
 
+    if (kIsDebugBuild) {
+      for (size_t i = 0; i < new_vtable_count; ++i) {
+        CHECK(move_table.find(vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_)) ==
+              move_table.end());
+      }
+    }
+
     klass->SetVTable(vtable.Get());
-    // Go fix up all the stale miranda pointers.
+    // Go fix up all the stale (old miranda or default method) pointers.
+    // First do it on the iftable.
     for (size_t i = 0; i < ifcount; ++i) {
       for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
         auto* method_array = iftable->GetMethodArray(i);
@@ -5811,7 +5854,7 @@
         }
       }
     }
-    // Fix up IMT in case it has any miranda methods in it.
+    // Fix up IMT next
     for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
       auto it = move_table.find(out_imt[i]);
       if (it != move_table.end()) {
@@ -5823,25 +5866,26 @@
       auto* resolved_methods = klass->GetDexCache()->GetResolvedMethods();
       for (size_t i = 0, count = klass->GetDexCache()->NumResolvedMethods(); i < count; ++i) {
         auto* m = mirror::DexCache::GetElementPtrSize(resolved_methods, i, image_pointer_size_);
-        // We don't remove default methods from the move table since we need them to update the
-        // vtable. Therefore just skip them for this check.
-        if (!m->IsDefault()) {
-          CHECK(move_table.find(m) == move_table.end()) << PrettyMethod(m);
-        }
+        CHECK(move_table.find(m) == move_table.end() ||
+              // The original versions of copied methods will still be present so allow those too.
+              // Note that if the first check passes this might fail to GetDeclaringClass().
+              std::find_if(m->GetDeclaringClass()->GetMethods(image_pointer_size_).begin(),
+                           m->GetDeclaringClass()->GetMethods(image_pointer_size_).end(),
+                           [m] (ArtMethod& meth) {
+                             return &meth == m;
+                           }) != m->GetDeclaringClass()->GetMethods(image_pointer_size_).end())
+            << "Obsolete methods " << PrettyMethod(m) << " is in dex cache!";
       }
     }
-    // Put some random garbage in old virtuals to help find stale pointers.
-    if (virtuals != old_virtuals) {
-      memset(old_virtuals, 0xFEu, old_size);
+    // Put some random garbage in old methods to help find stale pointers.
+    if (methods != old_methods && old_methods != nullptr) {
+      memset(old_methods, 0xFEu, old_size);
     }
   } else {
     self->EndAssertNoThreadSuspension(old_cause);
   }
   if (kIsDebugBuild) {
-    auto* check_vtable = klass->GetVTableDuringLinking();
-    for (int i = 0; i < check_vtable->GetLength(); ++i) {
-      CHECK(check_vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_) != nullptr);
-    }
+    SanityCheckVTable(klass, image_pointer_size_);
   }
   return true;
 }
@@ -5916,6 +5960,20 @@
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
+  //
+  // The overall sort order order is:
+  // 1) All object reference fields, sorted alphabetically.
+  // 2) All java long (64-bit) integer fields, sorted alphabetically.
+  // 3) All java double (64-bit) floating point fields, sorted alphabetically.
+  // 4) All java int (32-bit) integer fields, sorted alphabetically.
+  // 5) All java float (32-bit) floating point fields, sorted alphabetically.
+  // 6) All java char (16-bit) integer fields, sorted alphabetically.
+  // 7) All java short (16-bit) integer fields, sorted alphabetically.
+  // 8) All java boolean (8-bit) integer fields, sorted alphabetically.
+  // 9) All java byte (8-bit) integer fields, sorted alphabetically.
+  //
+  // Once the fields are sorted in this order we will attempt to fill any gaps that might be present
+  // in the memory layout of the structure. See ShuffleForward for how this is done.
   std::deque<ArtField*> grouped_and_sorted_fields;
   const char* old_no_suspend_cause = self->StartAssertNoThreadSuspension(
       "Naked ArtField references in deque");
@@ -6149,6 +6207,7 @@
   return resolved;
 }
 
+template <ClassLinker::ResolveMode kResolveMode>
 ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
                                       uint32_t method_idx,
                                       Handle<mirror::DexCache> dex_cache,
@@ -6160,6 +6219,12 @@
   ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx, image_pointer_size_);
   if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
     DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
+    if (kResolveMode == ClassLinker::kForceICCECheck) {
+      if (resolved->CheckIncompatibleClassChange(type)) {
+        ThrowIncompatibleClassChangeError(type, resolved->GetInvokeType(), resolved, referrer);
+        return nullptr;
+      }
+    }
     return resolved;
   }
   // Fail, get the declaring class.
@@ -6178,8 +6243,36 @@
       DCHECK(resolved == nullptr || resolved->GetDeclaringClassUnchecked() != nullptr);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
-      DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      // We have to check whether the method id really belongs to an interface (dex static bytecode
+      // constraint A15). Otherwise you must not invoke-interface on it.
+      //
+      // This is not symmetric to A12-A14 (direct, static, virtual), as using FindInterfaceMethod
+      // assumes that the given type is an interface, and will check the interface table if the
+      // method isn't declared in the class. So it may find an interface method (usually by name
+      // in the handling below, but we do the constraint check early). In that case,
+      // CheckIncompatibleClassChange will succeed (as it is called on an interface method)
+      // unexpectedly.
+      // Example:
+      //    interface I {
+      //      foo()
+      //    }
+      //    class A implements I {
+      //      ...
+      //    }
+      //    class B extends A {
+      //      ...
+      //    }
+      //    invoke-interface B.foo
+      //      -> FindInterfaceMethod finds I.foo (interface method), not A.foo (miranda method)
+      if (UNLIKELY(!klass->IsInterface())) {
+        ThrowIncompatibleClassChangeError(klass,
+                                          "Found class %s, but interface was expected",
+                                          PrettyDescriptor(klass).c_str());
+        return nullptr;
+      } else {
+        resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+        DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      }
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
@@ -6629,7 +6722,9 @@
   }
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           std::vector<const DexFile*>& dex_files,
+                                           jobject parent_loader) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
@@ -6660,8 +6755,8 @@
   for (const DexFile* dex_file : dex_files) {
     StackHandleScope<3> hs2(self);
 
-    // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
-    // oat file but we can leave it null.
+    // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is
+    // supposed to be the oat file but we can leave it null.
     Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(
         self,
         kDexFileIndexStart + 1));
@@ -6707,9 +6802,10 @@
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
   DCHECK(parent_field != nullptr);
-  mirror::Object* boot_cl =
-      soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
-  parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
+  mirror::Object* parent = (parent_loader != nullptr)
+      ? soa.Decode<mirror::ClassLoader*>(parent_loader)
+      : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
+  parent_field->SetObject<false>(h_path_class_loader.Get(), parent);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
@@ -6778,4 +6874,20 @@
   }
 }
 
+// Instantiate ResolveMethod.
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+
 }  // namespace art
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 5ba9652..f16fe92 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -246,11 +246,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
+  // Determine whether a dex cache result should be trusted, or an IncompatibleClassChangeError
+  // check should be performed even after a hit.
+  enum ResolveMode {  // private.
+    kNoICCECheckForCache,
+    kForceICCECheck
+  };
+
   // Resolve a method with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. What is unique is the method type argument which
   // is used to determine if this method is a direct, static, or
   // virtual method.
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(const DexFile& dex_file,
                            uint32_t method_idx,
                            Handle<mirror::DexCache> dex_cache,
@@ -262,6 +270,7 @@
 
   ArtMethod* GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -514,7 +523,10 @@
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
+  // If parent_loader is null then we use the boot class loader.
+  jobject CreatePathClassLoader(Thread* self,
+                                std::vector<const DexFile*>& dex_files,
+                                jobject parent_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -971,8 +983,8 @@
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void UpdateClassVirtualMethods(mirror::Class* klass,
-                                 LengthPrefixedArray<ArtMethod>* new_methods)
+  void UpdateClassMethods(mirror::Class* klass,
+                          LengthPrefixedArray<ArtMethod>* new_methods)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 2c086c5..99353c5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -254,10 +254,20 @@
       EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
     }
 
-    for (ArtMethod& method : klass->GetVirtualMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()));
+      EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
+    }
+
+    for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
+      AssertMethod(&method);
+      EXPECT_FALSE(method.IsDirect());
+      EXPECT_TRUE(method.IsMiranda() || method.IsDefault() || method.IsDefaultConflicting());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
@@ -491,18 +501,20 @@
 struct ClassOffsets : public CheckOffsets<mirror::Class> {
   ClassOffsets() : CheckOffsets<mirror::Class>(false, "Ljava/lang/Class;") {
     addOffset(OFFSETOF_MEMBER(mirror::Class, access_flags_), "accessFlags");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, annotation_type_), "annotationType");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_flags_), "classFlags");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_loader_), "classLoader");
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_size_), "classSize");
     addOffset(OFFSETOF_MEMBER(mirror::Class, clinit_thread_id_), "clinitThreadId");
     addOffset(OFFSETOF_MEMBER(mirror::Class, component_type_), "componentType");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, copied_methods_offset_), "copiedMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_), "dexCache");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, direct_methods_), "directMethods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, name_), "name");
     addOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_instance_fields_),
               "numReferenceInstanceFields");
@@ -516,7 +528,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
     addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_), "virtualMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
@@ -524,15 +536,15 @@
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
-    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode");
+    addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hash");
   };
 };
 
 struct ThrowableOffsets : public CheckOffsets<mirror::Throwable> {
   ThrowableOffsets() : CheckOffsets<mirror::Throwable>(false, "Ljava/lang/Throwable;") {
+    addOffset(OFFSETOF_MEMBER(mirror::Throwable, backtrace_), "backtrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, cause_), "cause");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, detail_message_), "detailMessage");
-    addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_state_), "stackState");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_trace_), "stackTrace");
     addOffset(OFFSETOF_MEMBER(mirror::Throwable, suppressed_exceptions_), "suppressedExceptions");
   };
@@ -601,7 +613,7 @@
 struct AccessibleObjectOffsets : public CheckOffsets<mirror::AccessibleObject> {
   AccessibleObjectOffsets() : CheckOffsets<mirror::AccessibleObject>(
       false, "Ljava/lang/reflect/AccessibleObject;") {
-    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "flag");
+    addOffset(mirror::AccessibleObject::FlagOffset().Uint32Value(), "override");
   };
 };
 
@@ -844,7 +856,7 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
   EXPECT_STREQ("value", c->GetIFieldsPtr()->At(0).GetName());
@@ -1090,7 +1102,7 @@
 
 TEST_F(ClassLinkerTest, ValidatePredefinedClassSizes) {
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Class;", class_loader);
@@ -1122,10 +1134,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   EXPECT_EQ((c->GetAccessFlags() & kAccPreverified) != 0U, preverified)
       << "Class " << PrettyClass(c) << " not as expected";
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    CheckMethod(&m, preverified);
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     CheckMethod(&m, preverified);
   }
 }
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 3ed1c95..df2dbf4 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -115,7 +115,7 @@
   return false;
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
     const {
   std::string temp;
   return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
@@ -133,7 +133,7 @@
   return a.Read()->DescriptorEquals(descriptor);
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
@@ -148,4 +148,29 @@
   return true;
 }
 
+size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  ClassSet combined;
+  // Combine all the class sets in case there are multiple, also adjusts load factor back to
+  // default in case classes were pruned.
+  for (const ClassSet& class_set : classes_) {
+    for (const GcRoot<mirror::Class>& root : class_set) {
+      combined.Insert(root);
+    }
+  }
+  const size_t ret = combined.WriteToMemory(ptr);
+  // Sanity check.
+  if (kIsDebugBuild && ptr != nullptr) {
+    size_t read_count;
+    ClassSet class_set(ptr, /*make copy*/false, &read_count);
+    class_set.Verify();
+  }
+  return ret;
+}
+
+size_t ClassTable::ReadFromMemory(uint8_t* ptr) {
+  size_t read_count = 0;
+  classes_.insert(classes_.begin(), ClassSet(ptr, /*make copy*/false, &read_count));
+  return read_count;
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 002bb56..c911365 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -104,17 +104,27 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Combines all of the tables into one class set.
+  size_t WriteToMemory(uint8_t* ptr) const
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t ReadFromMemory(uint8_t* ptr)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   class ClassDescriptorHashEquals {
    public:
+    // uint32_t for cross compilation.
+    uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
         NO_THREAD_SAFETY_ANALYSIS;;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+    // uint32_t for cross compilation.
+    uint32_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
diff --git a/runtime/code_simulator_container.cc b/runtime/code_simulator_container.cc
new file mode 100644
index 0000000..d884c58
--- /dev/null
+++ b/runtime/code_simulator_container.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dlfcn.h>
+
+#include "code_simulator_container.h"
+#include "globals.h"
+
+namespace art {
+
+CodeSimulatorContainer::CodeSimulatorContainer(InstructionSet target_isa)
+    : libart_simulator_handle_(nullptr),
+      simulator_(nullptr) {
+  const char* libart_simulator_so_name =
+      kIsDebugBuild ? "libartd-simulator.so" : "libart-simulator.so";
+  libart_simulator_handle_ = dlopen(libart_simulator_so_name, RTLD_NOW);
+  // It is not a real error when libart-simulator does not exist, e.g., on target.
+  if (libart_simulator_handle_ == nullptr) {
+    VLOG(simulator) << "Could not load " << libart_simulator_so_name << ": " << dlerror();
+  } else {
+    typedef CodeSimulator* (*create_code_simulator_ptr_)(InstructionSet target_isa);
+    create_code_simulator_ptr_ create_code_simulator_ =
+        reinterpret_cast<create_code_simulator_ptr_>(
+            dlsym(libart_simulator_handle_, "CreateCodeSimulator"));
+    DCHECK(create_code_simulator_ != nullptr) << "Fail to find symbol of CreateCodeSimulator: "
+        << dlerror();
+    simulator_ = create_code_simulator_(target_isa);
+  }
+}
+
+CodeSimulatorContainer::~CodeSimulatorContainer() {
+  // Free simulator object before closing libart-simulator because destructor of
+  // CodeSimulator lives in it.
+  if (simulator_ != nullptr) {
+    delete simulator_;
+  }
+  if (libart_simulator_handle_ != nullptr) {
+    dlclose(libart_simulator_handle_);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/code_simulator_container.h b/runtime/code_simulator_container.h
new file mode 100644
index 0000000..655a247
--- /dev/null
+++ b/runtime/code_simulator_container.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+#define ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+
+#include "arch/instruction_set.h"
+#include "simulator/code_simulator.h"
+
+namespace art {
+
+// This container dynamically opens and closes libart-simulator.
+class CodeSimulatorContainer {
+ public:
+  explicit CodeSimulatorContainer(InstructionSet target_isa);
+  ~CodeSimulatorContainer();
+
+  bool CanSimulate() const {
+    return simulator_ != nullptr;
+  }
+
+  CodeSimulator* Get() {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+  const CodeSimulator* Get() const {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+ private:
+  void* libart_simulator_handle_;
+  CodeSimulator* simulator_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorContainer);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index b6b5141..403dd4c 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -303,7 +303,12 @@
 
 
   RuntimeOptions options;
-  std::string boot_class_path_string = "-Xbootclasspath:" + GetLibCoreDexFileName();
+  std::string boot_class_path_string = "-Xbootclasspath";
+  for (const std::string &core_dex_file_name : GetLibCoreDexFileNames()) {
+    boot_class_path_string += ":";
+    boot_class_path_string += core_dex_file_name;
+  }
+
   options.push_back(std::make_pair(boot_class_path_string, nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair(min_heap_string, nullptr));
@@ -328,6 +333,19 @@
   class_linker_ = runtime_->GetClassLinker();
   class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
 
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  // Get the boot class path from the runtime so it can be used in tests.
+  boot_class_path_ = class_linker_->GetBootClassPath();
+  ASSERT_FALSE(boot_class_path_.empty());
+  java_lang_dex_file_ = boot_class_path_[0];
+
+  FinalizeSetup();
+}
+
+void CommonRuntimeTest::FinalizeSetup() {
   // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
   // set up.
   if (!unstarted_initialized_) {
@@ -335,14 +353,10 @@
     unstarted_initialized_ = true;
   }
 
-  class_linker_->RunRootClinits();
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  java_lang_dex_file_ = boot_class_path_[0];
-
-
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker_->RunRootClinits();
+  }
 
   // We're back in native, take the opportunity to initialize well known classes.
   WellKnownClasses::Init(Thread::Current()->GetJniEnv());
@@ -353,11 +367,6 @@
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
   // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject).
   runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
-
-  // Get the boot class path from the runtime so it can be used in tests.
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  ASSERT_FALSE(boot_class_path_.empty());
-  java_lang_dex_file_ = boot_class_path_[0];
 }
 
 void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
@@ -405,10 +414,30 @@
   (*icu_cleanup_fn)();
 
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
+
+  // Manually closing the JNI libraries.
+  // Runtime does not support repeatedly doing JNI->CreateVM, thus we need to manually clean up the
+  // dynamic linking loader so that gtests would not fail.
+  // Bug: 25785594
+  if (runtime_->IsStarted()) {
+    {
+      // We retrieve the handle by calling dlopen on the library. To close it, we need to call
+      // dlclose twice, the first time to undo our dlopen and the second time to actually unload it.
+      // See man dlopen.
+      void* handle = dlopen("libjavacore.so", RTLD_LAZY);
+      dlclose(handle);
+      CHECK_EQ(0, dlclose(handle));
+    }
+    {
+      void* handle = dlopen("libopenjdk.so", RTLD_LAZY);
+      dlclose(handle);
+      CHECK_EQ(0, dlclose(handle));
+    }
+  }
 }
 
-std::string CommonRuntimeTest::GetLibCoreDexFileName() {
-  return GetDexFileName("core-libart");
+std::vector<std::string> CommonRuntimeTest::GetLibCoreDexFileNames() {
+  return std::vector<std::string>({GetDexFileName("core-oj"), GetDexFileName("core-libart")});
 }
 
 std::string CommonRuntimeTest::GetDexFileName(const std::string& jar_prefix) {
@@ -553,7 +582,8 @@
 
   Thread* self = Thread::Current();
   jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
-                                                                                     class_path);
+                                                                                     class_path,
+                                                                                     nullptr);
   self->SetClassLoaderOverride(class_loader);
   return class_loader;
 }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 6da2bef..8d9e628 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -77,8 +77,8 @@
   CommonRuntimeTest();
   ~CommonRuntimeTest();
 
-  // Gets the path of the libcore dex file.
-  static std::string GetLibCoreDexFileName();
+  // Gets the paths of the libcore dex files.
+  static std::vector<std::string> GetLibCoreDexFileNames();
 
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
@@ -114,6 +114,10 @@
   // Called after the runtime is created.
   virtual void PostRuntimeCreate() {}
 
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup();
+
   // Gets the path of the specified dex file for host or target.
   static std::string GetDexFileName(const std::string& jar_prefix);
 
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index af7b04f..a39d682 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -37,8 +37,8 @@
 
   virtual ~CompilerCallbacks() { }
 
-  virtual bool MethodVerified(verifier::MethodVerifier* verifier)
-  SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void MethodVerified(verifier::MethodVerifier* verifier)
+      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
   virtual void ClassRejected(ClassReference ref) = 0;
 
   // Return true if we should attempt to relocate to a random base address if we have not already
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 32e77b7..e0211f5 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,10 +69,25 @@
   return alloc_record_count;
 }
 
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      sizeof(void*));
+  }
+}
+
 class Breakpoint : public ValueObject {
  public:
   Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
-    : method_(method),
+    : method_(GetCanonicalMethod(method)),
       dex_pc_(dex_pc),
       deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
@@ -99,6 +114,12 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
   ArtMethod* method_;
@@ -306,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -1282,9 +1303,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -1470,25 +1491,20 @@
     return error;
   }
 
-  size_t direct_method_count = c->NumDirectMethods();
-  size_t virtual_method_count = c->NumVirtualMethods();
-
-  expandBufAdd4BE(pReply, direct_method_count + virtual_method_count);
+  expandBufAdd4BE(pReply, c->NumMethods());
 
   auto* cl = Runtime::Current()->GetClassLinker();
   auto ptr_size = cl->GetImagePointerSize();
-  for (size_t i = 0; i < direct_method_count + virtual_method_count; ++i) {
-    ArtMethod* m = i < direct_method_count ?
-        c->GetDirectMethod(i, ptr_size) : c->GetVirtualMethod(i - direct_method_count, ptr_size);
-    expandBufAddMethodId(pReply, ToMethodId(m));
-    expandBufAddUtf8String(pReply, m->GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
+  for (ArtMethod& m : c->GetMethods(ptr_size)) {
+    expandBufAddMethodId(pReply, ToMethodId(&m));
+    expandBufAddUtf8String(pReply, m.GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
     expandBufAddUtf8String(pReply,
-                           m->GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
+                           m.GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
     if (with_generic) {
       const char* generic_signature = "";
       expandBufAddUtf8String(pReply, generic_signature);
     }
-    expandBufAdd4BE(pReply, MangleAccessFlags(m->GetAccessFlags()));
+    expandBufAdd4BE(pReply, MangleAccessFlags(m.GetAccessFlags()));
   }
   return JDWP::ERR_NONE;
 }
@@ -1515,10 +1531,10 @@
     int numItems;
     JDWP::ExpandBuf* pReply;
 
-    static bool Callback(void* context, uint32_t address, uint32_t line_number) {
+    static bool Callback(void* context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
-      expandBufAdd8BE(pContext->pReply, address);
-      expandBufAdd4BE(pContext->pReply, line_number);
+      expandBufAdd8BE(pContext->pReply, entry.address_);
+      expandBufAdd4BE(pContext->pReply, entry.line_);
       pContext->numItems++;
       return false;
     }
@@ -1548,8 +1564,7 @@
   context.pReply = pReply;
 
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + numLinesOffset, context.numItems);
@@ -1563,25 +1578,26 @@
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress,
-                         const char* name, const char* descriptor, const char* signature)
+    static void Callback(void* context, const DexFile::LocalInfo& entry)
         SHARED_REQUIRES(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
+      uint16_t slot = entry.reg_;
       VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d",
-                                 pContext->variable_count, startAddress, endAddress - startAddress,
-                                 name, descriptor, signature, slot,
+                                 pContext->variable_count, entry.start_address_,
+                                 entry.end_address_ - entry.start_address_,
+                                 entry.name_, entry.descriptor_, entry.signature_, slot,
                                  MangleSlot(slot, pContext->method));
 
       slot = MangleSlot(slot, pContext->method);
 
-      expandBufAdd8BE(pContext->pReply, startAddress);
-      expandBufAddUtf8String(pContext->pReply, name);
-      expandBufAddUtf8String(pContext->pReply, descriptor);
+      expandBufAdd8BE(pContext->pReply, entry.start_address_);
+      expandBufAddUtf8String(pContext->pReply, entry.name_);
+      expandBufAddUtf8String(pContext->pReply, entry.descriptor_);
       if (pContext->with_generic) {
-        expandBufAddUtf8String(pContext->pReply, signature);
+        expandBufAddUtf8String(pContext->pReply, entry.signature_);
       }
-      expandBufAdd4BE(pContext->pReply, endAddress - startAddress);
+      expandBufAdd4BE(pContext->pReply, entry.end_address_- entry.start_address_);
       expandBufAdd4BE(pContext->pReply, slot);
 
       ++pContext->variable_count;
@@ -1606,8 +1622,8 @@
 
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(
-        code_item, m->IsStatic(), m->GetDexMethodIndex(), nullptr, DebugCallbackContext::Callback,
+    m->GetDexFile()->DecodeDebugLocalInfo(
+        code_item, m->IsStatic(), m->GetDexMethodIndex(), DebugCallbackContext::Callback,
         &context);
   }
 
@@ -2017,29 +2033,28 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
-  // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
-  mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
-  {
-    // The "groups" field is declared as a java.util.List: check it really is
-    // an instance of java.util.ArrayList.
-    CHECK(groups_array_list != nullptr);
-    mirror::Class* java_util_ArrayList_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_util_ArrayList);
-    CHECK(groups_array_list->InstanceOf(java_util_ArrayList_class));
+  // Get the int "ngroups" count of this thread group...
+  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  CHECK(ngroups_field != nullptr);
+  const int32_t size = ngroups_field->GetInt(thread_group);
+  if (size == 0) {
+    return;
   }
 
-  // Get the array and size out of the ArrayList<ThreadGroup>...
-  ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
-  ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
-  mirror::ObjectArray<mirror::Object>* groups_array =
-      array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
-  const int32_t size = size_field->GetInt(groups_array_list);
+  // Get the ThreadGroup[] "groups" out of this thread group...
+  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  mirror::Object* groups_array = groups_field->GetObject(thread_group);
+
+  CHECK(groups_array != nullptr);
+  CHECK(groups_array->IsObjectArray());
+
+  mirror::ObjectArray<mirror::Object>* groups_array_as_array =
+      groups_array->AsObjectArray<mirror::Object>();
 
   // Copy the first 'size' elements out of the array into the result.
   ObjectRegistry* registry = Dbg::GetObjectRegistry();
   for (int32_t i = 0; i < size; ++i) {
-    child_thread_group_ids->push_back(registry->Add(groups_array->Get(i)));
+    child_thread_group_ids->push_back(registry->Add(groups_array_as_array->Get(i)));
   }
 }
 
@@ -2763,7 +2778,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3214,7 +3229,7 @@
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3231,7 +3246,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3274,12 +3289,15 @@
 
   if (first_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // inlined or default, we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be inlined or otherwise missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
     // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
     // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    bool need_full_deoptimization = m->IsDefault() || IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining or copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
@@ -3359,7 +3377,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
@@ -3692,19 +3710,19 @@
           code_item_(code_item), last_pc_valid(false), last_pc(0) {
     }
 
-    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number_cb) {
+    static bool Callback(void* raw_context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* context = reinterpret_cast<DebugCallbackContext*>(raw_context);
-      if (static_cast<int32_t>(line_number_cb) == context->line_number_) {
+      if (static_cast<int32_t>(entry.line_) == context->line_number_) {
         if (!context->last_pc_valid) {
           // Everything from this address until the next line change is ours.
-          context->last_pc = address;
+          context->last_pc = entry.address_;
           context->last_pc_valid = true;
         }
         // Otherwise, if we're already in a valid range for this line,
         // just keep going (shouldn't really happen)...
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
-        for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
+        for (uint32_t dex_pc = context->last_pc; dex_pc < entry.address_; ++dex_pc) {
           context->single_step_control_->AddDexPc(dex_pc);
         }
         context->last_pc_valid = false;
@@ -3745,8 +3763,7 @@
   if (m != nullptr && !m->IsNative()) {
     const DexFile::CodeItem* const code_item = m->GetCodeItem();
     DebugCallbackContext context(single_step_control, line_number, code_item);
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   // Activate single-step in the thread.
@@ -4732,12 +4749,7 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context(what == HPSG_WHAT_MERGED_OBJECTS, native);
   if (native) {
-#if defined(__ANDROID__) && defined(USE_DLMALLOC)
-    dlmalloc_inspect_all(HeapChunkContext::HeapChunkNativeCallback, &context);
-    HeapChunkContext::HeapChunkNativeCallback(nullptr, nullptr, 0, &context);  // Indicate end of a space.
-#else
-    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
-#endif
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is not supported";
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     for (const auto& space : heap->GetContinuousSpaces()) {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 70096f5..bc8ba97 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,6 +29,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/file_magic.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -62,26 +63,6 @@
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
 const uint8_t DexFile::kDexMagicVersion[] = { '0', '3', '5', '\0' };
 
-static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
-  CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return -1;
-  }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
-  if (n != sizeof(*magic)) {
-    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return -1;
-  }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
-    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
-                              strerror(errno));
-    return -1;
-  }
-  return fd.release();
-}
-
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != nullptr);
   uint32_t magic;
@@ -786,8 +767,7 @@
 
   // A method with no line number info should return -1
   LineNumFromPcContext context(rel_pc, -1);
-  DecodeDebugInfo(code_item, method->IsStatic(), method->GetDexMethodIndex(), LineNumForPcCb,
-                  nullptr, &context);
+  DecodeDebugPositionInfo(code_item, LineNumForPcCb, &context);
   return context.line_num_;
 }
 
@@ -824,45 +804,48 @@
   }
 }
 
-void DexFile::DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                               DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                               void* context, const uint8_t* stream, LocalInfo* local_in_reg)
-    const {
-  uint32_t line = DecodeUnsignedLeb128(&stream);
-  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
-  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
-  uint32_t address = 0;
-  bool need_locals = (local_cb != nullptr);
+bool DexFile::DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                                   DexDebugNewLocalCb local_cb, void* context) const {
+  DCHECK(local_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+  std::vector<LocalInfo> local_in_reg(code_item->registers_size_);
 
+  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
   if (!is_static) {
-    if (need_locals) {
-      const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
-      local_in_reg[arg_reg].name_ = "this";
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = 0;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
+    local_in_reg[arg_reg].name_ = "this";
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     arg_reg++;
   }
 
   DexFileParameterIterator it(*this, GetMethodPrototype(GetMethodId(method_idx)));
-  for (uint32_t i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
+  DecodeUnsignedLeb128(&stream);  // Line.
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  uint32_t i;
+  for (i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
     if (arg_reg >= code_item->registers_size_) {
       LOG(ERROR) << "invalid stream - arg reg >= reg size (" << arg_reg
                  << " >= " << code_item->registers_size_ << ") in " << GetLocation();
-      return;
+      return false;
     }
-    uint32_t id = DecodeUnsignedLeb128P1(&stream);
+    uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
     const char* descriptor = it.GetDescriptor();
-    if (need_locals && id != kDexNoIndex) {
-      const char* name = StringDataByIdx(id);
-      local_in_reg[arg_reg].name_ = name;
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = address;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    local_in_reg[arg_reg].name_ = StringDataByIdx(name_idx);
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     switch (*descriptor) {
       case 'D':
       case 'J':
@@ -873,152 +856,188 @@
         break;
     }
   }
-
-  if (it.HasNext()) {
+  if (i != parameters_size || it.HasNext()) {
     LOG(ERROR) << "invalid stream - problem with parameter iterator in " << GetLocation()
                << " for method " << PrettyMethod(method_idx, *this);
-    return;
+    return false;
   }
 
+  uint32_t address = 0;
   for (;;)  {
     uint8_t opcode = *stream++;
-    uint16_t reg;
-    uint32_t name_idx;
-    uint32_t descriptor_idx;
-    uint32_t signature_idx = 0;
-
     switch (opcode) {
       case DBG_END_SEQUENCE:
-        return;
-
+        // Emit all variables which are still alive at the end of the method.
+        for (uint16_t reg = 0; reg < code_item->registers_size_; reg++) {
+          if (local_in_reg[reg].is_live_) {
+            local_in_reg[reg].end_address_ = code_item->insns_size_in_code_units_;
+            local_cb(context, local_in_reg[reg]);
+          }
+        }
+        return true;
       case DBG_ADVANCE_PC:
         address += DecodeUnsignedLeb128(&stream);
         break;
-
       case DBG_ADVANCE_LINE:
-        line += DecodeSignedLeb128(&stream);
+        DecodeSignedLeb128(&stream);  // Line.
         break;
-
       case DBG_START_LOCAL:
-      case DBG_START_LOCAL_EXTENDED:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
+      case DBG_START_LOCAL_EXTENDED: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
                      << code_item->registers_size_ << ") in " << GetLocation();
-          return;
+          return false;
         }
 
-        name_idx = DecodeUnsignedLeb128P1(&stream);
-        descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t signature_idx = kDexNoIndex;
         if (opcode == DBG_START_LOCAL_EXTENDED) {
           signature_idx = DecodeUnsignedLeb128P1(&stream);
         }
 
         // Emit what was previously there, if anything
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
+        if (local_in_reg[reg].is_live_) {
+          local_in_reg[reg].end_address_ = address;
+          local_cb(context, local_in_reg[reg]);
+        }
 
-          local_in_reg[reg].name_ = StringDataByIdx(name_idx);
-          local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
-          local_in_reg[reg].signature_ =
-              (opcode == DBG_START_LOCAL_EXTENDED) ? StringDataByIdx(signature_idx)
-                                                   : nullptr;
+        local_in_reg[reg].name_ = StringDataByIdx(name_idx);
+        local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
+        local_in_reg[reg].signature_ = StringDataByIdx(signature_idx);
+        local_in_reg[reg].start_address_ = address;
+        local_in_reg[reg].reg_ = reg;
+        local_in_reg[reg].is_live_ = true;
+        break;
+      }
+      case DBG_END_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        if (!local_in_reg[reg].is_live_) {
+          LOG(ERROR) << "invalid stream - end without start in " << GetLocation();
+          return false;
+        }
+        local_in_reg[reg].end_address_ = address;
+        local_cb(context, local_in_reg[reg]);
+        local_in_reg[reg].is_live_ = false;
+        break;
+      }
+      case DBG_RESTART_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        // If the register is live, the "restart" is superfluous,
+        // and we don't want to mess with the existing start address.
+        if (!local_in_reg[reg].is_live_) {
           local_in_reg[reg].start_address_ = address;
           local_in_reg[reg].is_live_ = true;
         }
         break;
-
-      case DBG_END_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
-          local_in_reg[reg].is_live_ = false;
-        }
-        break;
-
-      case DBG_RESTART_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          if (local_in_reg[reg].name_ == nullptr || local_in_reg[reg].descriptor_ == nullptr) {
-            LOG(ERROR) << "invalid stream - no name or descriptor in " << GetLocation();
-            return;
-          }
-
-          // If the register is live, the "restart" is superfluous,
-          // and we don't want to mess with the existing start address.
-          if (!local_in_reg[reg].is_live_) {
-            local_in_reg[reg].start_address_ = address;
-            local_in_reg[reg].is_live_ = true;
-          }
-        }
-        break;
-
+      }
       case DBG_SET_PROLOGUE_END:
       case DBG_SET_EPILOGUE_BEGIN:
-      case DBG_SET_FILE:
         break;
+      case DBG_SET_FILE:
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        break;
+      default:
+        address += (opcode - DBG_FIRST_SPECIAL) / DBG_LINE_RANGE;
+        break;
+    }
+  }
+}
 
+bool DexFile::DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                                      void* context) const {
+  DCHECK(position_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+
+  PositionInfo entry = PositionInfo();
+  entry.line_ = DecodeUnsignedLeb128(&stream);
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  for (uint32_t i = 0; i < parameters_size; ++i) {
+    DecodeUnsignedLeb128P1(&stream);  // Parameter name.
+  }
+
+  for (;;)  {
+    uint8_t opcode = *stream++;
+    switch (opcode) {
+      case DBG_END_SEQUENCE:
+        return true;  // end of stream.
+      case DBG_ADVANCE_PC:
+        entry.address_ += DecodeUnsignedLeb128(&stream);
+        break;
+      case DBG_ADVANCE_LINE:
+        entry.line_ += DecodeSignedLeb128(&stream);
+        break;
+      case DBG_START_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        break;
+      case DBG_START_LOCAL_EXTENDED:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        DecodeUnsignedLeb128P1(&stream);  // signature.
+        break;
+      case DBG_END_LOCAL:
+      case DBG_RESTART_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        break;
+      case DBG_SET_PROLOGUE_END:
+        entry.prologue_end_ = true;
+        break;
+      case DBG_SET_EPILOGUE_BEGIN:
+        entry.epilogue_begin_ = true;
+        break;
+      case DBG_SET_FILE: {
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        entry.source_file_ = StringDataByIdx(name_idx);
+        break;
+      }
       default: {
         int adjopcode = opcode - DBG_FIRST_SPECIAL;
-
-        address += adjopcode / DBG_LINE_RANGE;
-        line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
-
-        if (position_cb != nullptr) {
-          if (position_cb(context, address, line)) {
-            // early exit
-            return;
-          }
+        entry.address_ += adjopcode / DBG_LINE_RANGE;
+        entry.line_ += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
+        if (position_cb(context, entry)) {
+          return true;  // early exit.
         }
+        entry.prologue_end_ = false;
+        entry.epilogue_begin_ = false;
         break;
       }
     }
   }
 }
 
-void DexFile::DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                              DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                              void* context) const {
-  DCHECK(code_item != nullptr);
-  const uint8_t* stream = GetDebugInfoStream(code_item);
-  std::unique_ptr<LocalInfo[]> local_in_reg(local_cb != nullptr ?
-                                      new LocalInfo[code_item->registers_size_] :
-                                      nullptr);
-  if (stream != nullptr) {
-    DecodeDebugInfo0(code_item, is_static, method_idx, position_cb, local_cb, context, stream,
-                     &local_in_reg[0]);
-  }
-  for (int reg = 0; reg < code_item->registers_size_; reg++) {
-    InvokeLocalCbIfLive(context, reg, code_item->insns_size_in_code_units_, &local_in_reg[0],
-                        local_cb);
-  }
-}
-
-bool DexFile::LineNumForPcCb(void* raw_context, uint32_t address, uint32_t line_num) {
+bool DexFile::LineNumForPcCb(void* raw_context, const PositionInfo& entry) {
   LineNumFromPcContext* context = reinterpret_cast<LineNumFromPcContext*>(raw_context);
 
   // We know that this callback will be called in
   // ascending address order, so keep going until we find
   // a match or we've just gone past it.
-  if (address > context->address_) {
+  if (entry.address_ > context->address_) {
     // The line number from the previous positions callback
     // wil be the final result.
     return true;
   } else {
-    context->line_num_ = line_num;
-    return address == context->address_;
+    context->line_num_ = entry.line_;
+    return entry.address_ == context->address_;
   }
 }
 
@@ -1364,8 +1383,11 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Object* obj = GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationType);
+  mirror::Object* obj = GetAnnotationValue(klass,
+                                           annotation_item,
+                                           "value",
+                                           ScopedNullHandle<mirror::Class>(),
+                                           kDexAnnotationType);
   if (obj == nullptr) {
     return nullptr;
   }
@@ -1391,8 +1413,11 @@
     return nullptr;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return nullptr;
   }
   if (annotation_value.type_ != kDexAnnotationMethod) {
@@ -1420,7 +1445,7 @@
     return nullptr;
   }
   return GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationMethod);
+      klass, annotation_item, "value", ScopedNullHandle<mirror::Class>(), kDexAnnotationMethod);
 }
 
 bool DexFile::GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) const {
@@ -1438,8 +1463,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllObjects)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllObjects)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationNull &&
@@ -1465,8 +1493,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationInt) {
@@ -1870,10 +1901,10 @@
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
             klass->GetDexFile(), index, dex_cache, class_loader, true);
-        Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
         if (enum_field == nullptr) {
           return false;
         } else {
+          Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
           Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true);
           element_object = enum_field->GetObject(field_class.Get());
           set_object = true;
@@ -2227,13 +2258,48 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
-    const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
-    Handle<mirror::ClassLoader>* class_loader, ClassLinker* linker,
+    const DexFile& dex_file,
     const DexFile::ClassDef& class_def)
-    : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
-      array_size_(), pos_(-1), type_(kByte) {
-  DCHECK(dex_cache != nullptr);
-  DCHECK(class_loader != nullptr);
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      nullptr,
+                                      nullptr,
+                                      nullptr,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def)
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      dex_cache, class_loader,
+                                      linker,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def,
+    size_t pos,
+    ValueType type)
+    : dex_file_(dex_file),
+      dex_cache_(dex_cache),
+      class_loader_(class_loader),
+      linker_(linker),
+      array_size_(),
+      pos_(pos),
+      type_(type) {
   ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == nullptr) {
     array_size_ = 0;
@@ -2307,6 +2373,8 @@
 
 template<bool kTransactionActive>
 void EncodedStaticFieldValueIterator::ReadValueToField(ArtField* field) const {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
   switch (type_) {
     case kBoolean: field->SetBoolean<kTransactionActive>(field->GetDeclaringClass(), jval_.z);
         break;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1e44f50..8a3db6c 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -819,20 +819,50 @@
     }
   }
 
+  struct PositionInfo {
+    PositionInfo()
+        : address_(0),
+          line_(0),
+          source_file_(nullptr),
+          prologue_end_(false),
+          epilogue_begin_(false) {
+    }
+
+    uint32_t address_;  // In 16-bit code units.
+    uint32_t line_;  // Source code line number starting at 1.
+    const char* source_file_;  // nullptr if the file from ClassDef still applies.
+    bool prologue_end_;
+    bool epilogue_begin_;
+  };
+
   // Callback for "new position table entry".
   // Returning true causes the decoder to stop early.
-  typedef bool (*DexDebugNewPositionCb)(void* context, uint32_t address, uint32_t line_num);
+  typedef bool (*DexDebugNewPositionCb)(void* context, const PositionInfo& entry);
 
-  // Callback for "new locals table entry". "signature" is an empty string
-  // if no signature is available for an entry.
-  typedef void (*DexDebugNewLocalCb)(void* context, uint16_t reg,
-                                     uint32_t start_address,
-                                     uint32_t end_address,
-                                     const char* name,
-                                     const char* descriptor,
-                                     const char* signature);
+  struct LocalInfo {
+    LocalInfo()
+        : name_(nullptr),
+          descriptor_(nullptr),
+          signature_(nullptr),
+          start_address_(0),
+          end_address_(0),
+          reg_(0),
+          is_live_(false) {
+    }
 
-  static bool LineNumForPcCb(void* context, uint32_t address, uint32_t line_num);
+    const char* name_;  // E.g., list.  It can be nullptr if unknown.
+    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
+    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
+    uint32_t start_address_;  // PC location where the local is first defined.
+    uint32_t end_address_;  // PC location where the local is no longer defined.
+    uint16_t reg_;  // Dex register which stores the values.
+    bool is_live_;  // Is the local defined and live.
+  };
+
+  // Callback for "new locals table entry".
+  typedef void (*DexDebugNewLocalCb)(void* context, const LocalInfo& entry);
+
+  static bool LineNumForPcCb(void* context, const PositionInfo& entry);
 
   const AnnotationsDirectoryItem* GetAnnotationsDirectory(const ClassDef& class_def) const {
     if (class_def.annotations_off_ == 0) {
@@ -1044,21 +1074,6 @@
     DBG_LINE_RANGE           = 15,
   };
 
-  struct LocalInfo {
-    LocalInfo()
-        : name_(nullptr), descriptor_(nullptr), signature_(nullptr), start_address_(0),
-          is_live_(false) {}
-
-    const char* name_;  // E.g., list
-    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
-    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
-    uint16_t start_address_;  // PC location where the local is first defined.
-    bool is_live_;  // Is the local defined and live.
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(LocalInfo);
-  };
-
   struct LineNumFromPcContext {
     LineNumFromPcContext(uint32_t address, uint32_t line_num)
         : address_(address), line_num_(line_num) {}
@@ -1068,15 +1083,6 @@
     DISALLOW_COPY_AND_ASSIGN(LineNumFromPcContext);
   };
 
-  void InvokeLocalCbIfLive(void* context, int reg, uint32_t end_address,
-                           LocalInfo* local_in_reg, DexDebugNewLocalCb local_cb) const {
-    if (local_cb != nullptr && local_in_reg[reg].is_live_) {
-      local_cb(context, reg, local_in_reg[reg].start_address_, end_address,
-          local_in_reg[reg].name_, local_in_reg[reg].descriptor_,
-          local_in_reg[reg].signature_ != nullptr ? local_in_reg[reg].signature_ : "");
-    }
-  }
-
   // Determine the source file line number based on the program counter.
   // "pc" is an offset, in 16-bit units, from the start of the method's code.
   //
@@ -1088,9 +1094,13 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                       DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                       void* context) const;
+  // Returns false if there is no debugging information or if it can not be decoded.
+  bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                            DexDebugNewLocalCb local_cb, void* context) const;
+
+  // Returns false if there is no debugging information or if it can not be decoded.
+  bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                               void* context) const;
 
   const char* GetSourceFile(const ClassDef& class_def) const {
     if (class_def.source_file_idx_ == 0xffffffff) {
@@ -1200,10 +1210,6 @@
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
-  void DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-      DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-      void* context, const uint8_t* stream, LocalInfo* local_in_reg) const;
-
   // Check whether a location denotes a multidex dex file. This is a very simple check: returns
   // whether the string contains the separator character.
   static bool IsMultiDexLocation(const char* location);
@@ -1275,6 +1281,7 @@
     }
   }
   bool HasNext() const { return pos_ < size_; }
+  size_t Size() const { return size_; }
   void Next() { ++pos_; }
   uint16_t GetTypeIdx() {
     return type_list_->GetTypeItem(pos_).type_idx_;
@@ -1510,9 +1517,17 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
+  // A constructor for static tools. You cannot call
+  // ReadValueToField() for an object created by this.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  const DexFile::ClassDef& class_def);
+
+  // A constructor meant to be called from runtime code.
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
                                   Handle<mirror::ClassLoader>* class_loader,
-                                  ClassLinker* linker, const DexFile::ClassDef& class_def)
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
@@ -1541,7 +1556,18 @@
     kBoolean = 0x1f
   };
 
+  ValueType GetValueType() const { return type_; }
+  const jvalue& GetJavaValue() const { return jval_; }
+
  private:
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
+                                  Handle<mirror::ClassLoader>* class_loader,
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def,
+                                  size_t pos,
+                                  ValueType type);
+
   static constexpr uint8_t kEncodedValueTypeMask = 0x1f;  // 0b11111
   static constexpr uint8_t kEncodedValueArgShift = 5;
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 0a167bb..796701d 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -206,7 +206,7 @@
   uint32_t checksum;
   ScopedObjectAccess soa(Thread::Current());
   std::string error_msg;
-  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileName().c_str(), &checksum, &error_msg))
+  EXPECT_TRUE(DexFile::GetChecksum(GetLibCoreDexFileNames()[0].c_str(), &checksum, &error_msg))
       << error_msg;
   EXPECT_EQ(java_lang_dex_file_->GetLocationChecksum(), checksum);
 }
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 440d696..727f4fc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -2508,11 +2508,12 @@
                                   method_access_flags);
         return false;
       }
-      // Abstract methods must be in an abstract class or interface.
+      // Abstract methods should be in an abstract class or interface.
       if ((class_access_flags & (kAccInterface | kAccAbstract)) == 0) {
-        *error_msg = StringPrintf("Method %" PRIu32 " is abstract, but the declaring class "
-                                  "is neither abstract nor an interface", method_index);
-        return false;
+        LOG(WARNING) << "Method " << PrettyMethod(method_index, *dex_file_)
+                     << " is abstract, but the declaring class is neither abstract nor an "
+                     << "interface in dex file "
+                     << dex_file_->GetLocation();
       }
     }
     // Interfaces are special.
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 438b6b8..3f62124 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -189,8 +189,17 @@
         case CONST_STRING:
           if (file != nullptr) {
             uint32_t string_idx = VRegB_21c();
-            os << StringPrintf("const-string v%d, %s // string@%d", VRegA_21c(),
-                               PrintableString(file->StringDataByIdx(string_idx)).c_str(), string_idx);
+            if (string_idx < file->NumStringIds()) {
+              os << StringPrintf("const-string v%d, %s // string@%d",
+                                 VRegA_21c(),
+                                 PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                                 string_idx);
+            } else {
+              os << StringPrintf("const-string v%d, <<invalid-string-idx-%d>> // string@%d",
+                                 VRegA_21c(),
+                                 string_idx,
+                                 string_idx);
+            }
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -348,9 +357,19 @@
       if (Opcode() == CONST_STRING_JUMBO) {
         uint32_t string_idx = VRegB_31c();
         if (file != nullptr) {
-          os << StringPrintf("%s v%d, %s // string@%d", opcode, VRegA_31c(),
-                             PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                             string_idx);
+          if (string_idx < file->NumStringIds()) {
+            os << StringPrintf("%s v%d, %s // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                               string_idx);
+          } else {
+            os << StringPrintf("%s v%d, <<invalid-string-idx-%d>> // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               string_idx,
+                               string_idx);
+          }
         } else {
           os << StringPrintf("%s v%d, string@%d", opcode, VRegA_31c(), string_idx);
         }
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 2819670..57d623e 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -27,89 +27,12 @@
 #include "base/unix_file/fd_file.h"
 #include "elf_file_impl.h"
 #include "elf_utils.h"
+#include "jit/debugger_interface.h"
 #include "leb128.h"
 #include "utils.h"
 
 namespace art {
 
-// -------------------------------------------------------------------
-// Binary GDB JIT Interface as described in
-//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
-extern "C" {
-  typedef enum {
-    JIT_NOACTION = 0,
-    JIT_REGISTER_FN,
-    JIT_UNREGISTER_FN
-  } JITAction;
-
-  struct JITCodeEntry {
-    JITCodeEntry* next_;
-    JITCodeEntry* prev_;
-    const uint8_t *symfile_addr_;
-    uint64_t symfile_size_;
-  };
-
-  struct JITDescriptor {
-    uint32_t version_;
-    uint32_t action_flag_;
-    JITCodeEntry* relevant_entry_;
-    JITCodeEntry* first_entry_;
-  };
-
-  // GDB will place breakpoint into this function.
-  // To prevent GCC from inlining or removing it we place noinline attribute
-  // and inline assembler statement inside.
-  void __attribute__((noinline)) __jit_debug_register_code();
-  void __attribute__((noinline)) __jit_debug_register_code() {
-    __asm__("");
-  }
-
-  // GDB will inspect contents of this descriptor.
-  // Static initialization is necessary to prevent GDB from seeing
-  // uninitialized descriptor.
-  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
-}
-
-
-static JITCodeEntry* CreateCodeEntry(const uint8_t *symfile_addr,
-                                     uintptr_t symfile_size) {
-  JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
-  entry->symfile_size_ = symfile_size;
-  entry->prev_ = nullptr;
-
-  // TODO: Do we need a lock here?
-  entry->next_ = __jit_debug_descriptor.first_entry_;
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry;
-  }
-  __jit_debug_descriptor.first_entry_ = entry;
-  __jit_debug_descriptor.relevant_entry_ = entry;
-
-  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
-  __jit_debug_register_code();
-  return entry;
-}
-
-
-static void UnregisterCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
-  if (entry->prev_ != nullptr) {
-    entry->prev_->next_ = entry->next_;
-  } else {
-    __jit_debug_descriptor.first_entry_ = entry->next_;
-  }
-
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry->prev_;
-  }
-
-  __jit_debug_descriptor.relevant_entry_ = entry;
-  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  __jit_debug_register_code();
-  delete entry;
-}
-
 template <typename ElfTypes>
 ElfFileImpl<ElfTypes>::ElfFileImpl(File* file, bool writable,
                                    bool program_header_only,
@@ -352,7 +275,7 @@
   delete dynsym_symbol_table_;
   delete jit_elf_image_;
   if (jit_gdb_entry_) {
-    UnregisterCodeEntry(jit_gdb_entry_);
+    DeleteJITCodeEntry(jit_gdb_entry_);
   }
 }
 
@@ -1511,7 +1434,7 @@
     return;
   }
 
-  jit_gdb_entry_ = CreateCodeEntry(all.Begin(), all.Size());
+  jit_gdb_entry_ = CreateJITCodeEntry(all.Begin(), all.Size());
   gdb_file_mapping_.reset(all_ptr.release());
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..ba2fb94 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -68,7 +68,7 @@
     class_loader.Assign(caller->GetClassLoader());
   }
 
-  return class_linker->ResolveMethod(
+  return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
       *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
 }
 
@@ -401,7 +401,10 @@
     mirror::Object* null_this = nullptr;
     HandleWrapper<mirror::Object> h_this(
         hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
-    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+    constexpr ClassLinker::ResolveMode resolve_mode =
+        access_check ? ClassLinker::kForceICCECheck
+                     : ClassLinker::kNoICCECheckForCache;
+    resolved_method = class_linker->ResolveMethod<resolve_mode>(self, method_idx, referrer, type);
   }
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
@@ -598,8 +601,12 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+      // The super class does not have the method.
+      return nullptr;
+    }
+    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 87e29ae..915d9ab 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -306,11 +306,13 @@
       mirror::Method* interface_method = soa.Decode<mirror::Method*>(interface_method_jobj);
       ArtMethod* proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(
           interface_method->GetArtMethod(), sizeof(void*));
-      auto* virtual_methods = proxy_class->GetVirtualMethodsPtr();
+      auto virtual_methods = proxy_class->GetVirtualMethodsSlice(sizeof(void*));
       size_t num_virtuals = proxy_class->NumVirtualMethods();
       size_t method_size = ArtMethod::Size(sizeof(void*));
+      // Rely on the fact that the methods are contiguous to determine the index of the method in
+      // the slice.
       int throws_index = (reinterpret_cast<uintptr_t>(proxy_method) -
-          reinterpret_cast<uintptr_t>(virtual_methods)) / method_size;
+          reinterpret_cast<uintptr_t>(&virtual_methods.At(0))) / method_size;
       CHECK_LT(throws_index, static_cast<int>(num_virtuals));
       mirror::ObjectArray<mirror::Class>* declared_exceptions =
           proxy_class->GetThrows()->Get(throws_index);
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..c019cae 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -52,7 +52,7 @@
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
 
   QuickExceptionHandler exception_handler(self, true);
   exception_handler.DeoptimizeSingleFrame();
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 27865e3..f5b68fa 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -79,11 +79,17 @@
 // functions directly.  For x86 and x86-64, compilers need a wrapper
 // assembly function, to handle mismatch in ABI.
 
+// Mark the heap reference `obj`. This entry point is used by read
+// barrier fast path implementations generated by the compiler to mark
+// an object that is referenced by a field of a gray object.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 // Read barrier entrypoint for heap references.
-// This is the read barrier slow path for instance and static fields and reference-type arrays.
-// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
-// Ideally the slow path should only take one parameter "ref".
-extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+// This is the read barrier slow path for instance and static fields
+// and reference type arrays.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+                                              mirror::Object* obj,
                                               uint32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index ee7b986..faa4747 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -86,6 +86,23 @@
   V(CmpgFloat, int32_t, float, float) \
   V(CmplDouble, int32_t, double, double) \
   V(CmplFloat, int32_t, float, float) \
+  V(Cos, double, double) \
+  V(Sin, double, double) \
+  V(Acos, double, double) \
+  V(Asin, double, double) \
+  V(Atan, double, double) \
+  V(Atan2, double, double, double) \
+  V(Cbrt, double, double) \
+  V(Cosh, double, double) \
+  V(Exp, double, double) \
+  V(Expm1, double, double) \
+  V(Hypot, double, double, double) \
+  V(Log, double, double) \
+  V(Log10, double, double) \
+  V(NextAfter, double, double, double) \
+  V(Sinh, double, double) \
+  V(Tan, double, double) \
+  V(Tanh, double, double) \
   V(Fmod, double, double, double) \
   V(L2d, double, int64_t) \
   V(Fmodf, float, float, float) \
@@ -146,6 +163,7 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
+  V(ReadBarrierMark, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 7ec5fc5..1850254 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -559,8 +559,11 @@
   return -1;  // failure
 }
 
-// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
-// take one parameter "ref", which is given by the fast path.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) {
+  DCHECK(kEmitCompilerReadBarrier);
+  return ReadBarrier::Mark(obj);
+}
+
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
@@ -571,15 +574,14 @@
   constexpr ReadBarrierOption kReadBarrierOption =
       kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
   mirror::Object* result =
-      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption, true>(obj,
-                                                                     MemberOffset(offset),
-                                                                     ref_addr);
+      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj,
+                                                               MemberOffset(offset),
+                                                               ref_addr);
   return result;
 }
 
 extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) {
   DCHECK(kEmitCompilerReadBarrier);
-  // TODO: Pass a GcRootSource object as second argument to GcRoot::Read?
   return root->Read();
 }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index abf9ac4..08c9b49 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -685,7 +685,9 @@
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +714,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -754,7 +756,8 @@
   if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
@@ -1012,22 +1015,29 @@
     HandleWrapper<mirror::Object> h_receiver(
         hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
     DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
-    called = linker->ResolveMethod(self, called_method.dex_method_index, caller, invoke_type);
+    called = linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+        self, called_method.dex_method_index, caller, invoke_type);
   }
   const void* code = nullptr;
   if (LIKELY(!self->IsExceptionPending())) {
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
-      } else {
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+      } else {
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+            called->GetMethodIndex(), sizeof(void*));
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 4e85913..01e22a4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -31,6 +31,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, Runtime::CalleeSaveType type)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* r = Runtime::Current();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 8587ede..dc9f14c 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -223,7 +223,24 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgDouble, pCmpgFloat, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgFloat, pCmplDouble, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplDouble, pCmplFloat, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pFmod, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmplFloat, pCos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCos, pSin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSin, pAcos, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExpm1, pHypot, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHypot, pLog, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog, pLog10, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLog10, pNextAfter, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNextAfter, pSinh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pSinh, pTan, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTan, pTanh, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTanh, pFmod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmod, pL2d, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pL2d, pFmodf, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pFmodf, pL2f, sizeof(void*));
@@ -301,7 +318,8 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMark, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMark, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..7d00094 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1526,10 +1526,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1543,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 1cd7983..bcfcb89 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1080,7 +1080,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
@@ -1089,9 +1089,10 @@
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::WhitePtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::WhitePtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
@@ -1101,9 +1102,10 @@
         // indicate non-moving objects that have been marked
         // through. Note we'd need to change from black to white
         // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::BlackPtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::BlackPtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
@@ -1227,9 +1229,6 @@
  public:
   explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
       : collector_(cc) {}
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index f741117..4884e66 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -113,7 +113,9 @@
         /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
         /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size),
         /*pointer_size*/sizeof(void*),
-        /*compile_pic*/false);
+        /*compile_pic*/false,
+        ImageHeader::kStorageModeUncompressed,
+        /*storage_size*/0u);
     return new DummyImageSpace(map.release(), live_bitmap.release());
   }
 };
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e9497a2..99e98bb 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -367,37 +367,26 @@
                                    GetTimings());
       table->UpdateAndMarkReferences(this);
       DCHECK(GetHeap()->FindRememberedSetFromSpace(space) == nullptr);
-    } else if (collect_from_space_only_ && space->GetLiveBitmap() != nullptr) {
-      // If the space has no mod union table (the non-moving space and main spaces when the bump
-      // pointer space only collection is enabled,) then we need to scan its live bitmap or dirty
-      // cards as roots (including the objects on the live stack which have just marked in the live
-      // bitmap above in MarkAllocStackAsLive().)
-      DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
-          << "Space " << space->GetName() << " "
-          << "generational_=" << generational_ << " "
-          << "collect_from_space_only_=" << collect_from_space_only_;
+    } else if ((space->IsImageSpace() || collect_from_space_only_) &&
+               space->GetLiveBitmap() != nullptr) {
+      // If the space has no mod union table (the non-moving space, app image spaces, main spaces
+      // when the bump pointer space only collection is enabled,) then we need to scan its live
+      // bitmap or dirty cards as roots (including the objects on the live stack which have just
+      // marked in the live bitmap above in MarkAllocStackAsLive().)
       accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
-      if (kUseRememberedSet) {
+      if (!space->IsImageSpace()) {
+        DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
+            << "Space " << space->GetName() << " "
+            << "generational_=" << generational_ << " "
+            << "collect_from_space_only_=" << collect_from_space_only_;
         // App images currently do not have remembered sets.
-        DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) ||
-               rem_set != nullptr);
+        DCHECK_EQ(kUseRememberedSet, rem_set != nullptr);
       } else {
         DCHECK(rem_set == nullptr);
       }
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(from_space_, this);
-        if (kIsDebugBuild) {
-          // Verify that there are no from-space references that
-          // remain in the space, that is, the remembered set (and the
-          // card table) didn't miss any from-space references in the
-          // space.
-          accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-          SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
-          live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                        reinterpret_cast<uintptr_t>(space->End()),
-                                        visitor);
-        }
       } else {
         TimingLogger::ScopedTiming t2("VisitLiveBits", GetTimings());
         accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
@@ -406,6 +395,17 @@
                                       reinterpret_cast<uintptr_t>(space->End()),
                                       visitor);
       }
+      if (kIsDebugBuild) {
+        // Verify that there are no from-space references that
+        // remain in the space, that is, the remembered set (and the
+        // card table) didn't miss any from-space references in the
+        // space.
+        accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+        SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->End()),
+                                      visitor);
+      }
     }
   }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index da9a79e..6d72f31 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1352,28 +1352,10 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
-  size_t native_reclaimed = 0;
 
-#ifdef __ANDROID__
-  // Only trim the native heap if we don't care about pauses.
-  if (!CareAboutPauseTimes()) {
-#if defined(USE_DLMALLOC)
-    // Trim the native heap.
-    dlmalloc_trim(0);
-    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
-#elif defined(USE_JEMALLOC)
-    // Jemalloc does it's own internal trimming.
-#else
-    UNIMPLEMENTED(WARNING) << "Add trimming support";
-#endif
-  }
-#endif  // __ANDROID__
-  uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
-      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
-      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
-      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
-      << "%.";
+      << ", advised=" << PrettySize(managed_reclaimed) << ") heap. Managed heap utilization of "
+      << static_cast<int>(100 * managed_utilization) << "%.";
   ATRACE_END();
 }
 
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index ab921d9..dc23afe 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -35,7 +35,7 @@
   ASSERT_EQ(queue.GetLength(), 0U);
   auto ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
@@ -65,11 +65,11 @@
   queue.Dump(LOG(INFO));
   auto weak_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(weak_ref_class.Get() != nullptr);
   auto finalizer_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/FinalizerReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(finalizer_ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(weak_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1fe9a03..8f67c21 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -17,12 +17,12 @@
 #include "image_space.h"
 
 #include <dirent.h>
+#include <lz4.h>
+#include <random>
 #include <sys/statvfs.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#include <random>
-
 #include "art_method.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -58,10 +58,7 @@
   CHECK_ALIGNED(max_delta, kPageSize);
   CHECK_LT(min_delta, max_delta);
 
-  std::default_random_engine generator;
-  generator.seed(NanoTime() * getpid());
-  std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta);
-  int32_t r = distribution(generator);
+  int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta);
   if (r % 2 == 0) {
     r = RoundUp(r, kPageSize);
   } else {
@@ -680,11 +677,12 @@
     *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
     return nullptr;
   }
-  // Check that the file is large enough.
-  uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
-  if (image_header.GetImageSize() > image_file_size) {
-    *error_msg = StringPrintf("Image file too small for image heap: %" PRIu64 " vs. %zu.",
-                              image_file_size, image_header.GetImageSize());
+  // Check that the file is larger or equal to the header size + data size.
+  const uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
+  if (image_file_size < sizeof(ImageHeader) + image_header.GetDataSize()) {
+    *error_msg = StringPrintf("Image file truncated: %" PRIu64 " vs. %" PRIu64 ".",
+                              image_file_size,
+                              image_header.GetDataSize());
     return nullptr;
   }
 
@@ -700,7 +698,11 @@
   }
 
   const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
-  auto end_of_bitmap = static_cast<size_t>(bitmap_section.End());
+  // The location we want to map from is the first aligned page after the end of the stored
+  // (possibly compressed) data.
+  const size_t image_bitmap_offset = RoundUp(sizeof(image_header) + image_header.GetDataSize(),
+                                             kPageSize);
+  const size_t end_of_bitmap = image_bitmap_offset + bitmap_section.Size();
   if (end_of_bitmap != image_file_size) {
     *error_msg = StringPrintf(
         "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
@@ -709,16 +711,60 @@
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
-                                                       image_header.GetImageSize(),
-                                                       PROT_READ | PROT_WRITE,
+  std::unique_ptr<MemMap> map;
+  if (image_header.GetStorageMode() == ImageHeader::kStorageModeUncompressed) {
+    map.reset(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
+                                       image_header.GetImageSize(),
+                                       PROT_READ | PROT_WRITE,
+                                       MAP_PRIVATE,
+                                       file->Fd(),
+                                       0,
+                                       /*low_4gb*/false,
+                                       /*reuse*/false,
+                                       image_filename,
+                                       error_msg));
+  } else {
+    // Reserve output and decompress into it.
+    map.reset(MemMap::MapAnonymous(image_location,
+                                   image_header.GetImageBegin(),
+                                   image_header.GetImageSize(),
+                                   PROT_READ | PROT_WRITE,
+                                   /*low_4gb*/false,
+                                   /*reuse*/false,
+                                   error_msg));
+    if (map != nullptr) {
+      const size_t stored_size = image_header.GetDataSize();
+      const size_t write_offset = sizeof(image_header);  // Skip the header.
+      std::unique_ptr<MemMap> temp_map(MemMap::MapFile(sizeof(ImageHeader) + stored_size,
+                                                       PROT_READ,
                                                        MAP_PRIVATE,
                                                        file->Fd(),
-                                                       0,
+                                                       /*offset*/0,
                                                        /*low_4gb*/false,
-                                                       /*reuse*/false,
                                                        image_filename,
                                                        error_msg));
+      if (temp_map == nullptr) {
+        DCHECK(!error_msg->empty());
+        return nullptr;
+      }
+      memcpy(map->Begin(), &image_header, sizeof(image_header));
+      const uint64_t start = NanoTime();
+      const size_t decompressed_size = LZ4_decompress_safe(
+          reinterpret_cast<char*>(temp_map->Begin()) + sizeof(ImageHeader),
+          reinterpret_cast<char*>(map->Begin()) + write_offset,
+          stored_size,
+          map->Size());
+      // TODO: VLOG(image)
+      VLOG(class_linker) << "Decompressing image took " << PrettyDuration(NanoTime() - start);
+      if (decompressed_size + sizeof(ImageHeader) != image_header.GetImageSize()) {
+        *error_msg = StringPrintf("Decompressed size does not match expected image size %zu vs %zu",
+                                  decompressed_size + sizeof(ImageHeader),
+                                  image_header.GetImageSize());
+        return nullptr;
+      }
+    }
+  }
+
   if (map == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
@@ -726,16 +772,16 @@
   CHECK_EQ(image_header.GetImageBegin(), map->Begin());
   DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
 
-  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr,
-                                                             bitmap_section.Size(),
-                                                             PROT_READ, MAP_PRIVATE,
-                                                             file->Fd(),
-                                                             bitmap_section.Offset(),
-                                                             /*low_4gb*/false,
-                                                             /*reuse*/false,
-                                                             image_filename,
-                                                             error_msg));
-  if (image_map.get() == nullptr) {
+  std::unique_ptr<MemMap> image_bitmap_map(MemMap::MapFileAtAddress(nullptr,
+                                                                    bitmap_section.Size(),
+                                                                    PROT_READ, MAP_PRIVATE,
+                                                                    file->Fd(),
+                                                                    image_bitmap_offset,
+                                                                    /*low_4gb*/false,
+                                                                    /*reuse*/false,
+                                                                    image_filename,
+                                                                    error_msg));
+  if (image_bitmap_map == nullptr) {
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
@@ -744,9 +790,11 @@
                                        bitmap_index));
   std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(
-          bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()),
+          bitmap_name,
+          image_bitmap_map.release(),
+          reinterpret_cast<uint8_t*>(map->Begin()),
           accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size())));
-  if (bitmap.get() == nullptr) {
+  if (bitmap == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
   }
diff --git a/runtime/handle.h b/runtime/handle.h
index f939ec5..5b3bb60 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -64,7 +64,7 @@
 
   ALWAYS_INLINE jobject ToJObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
-      // Special case so that we work with NullHandles.
+      // Special case so that we work with null handles.
       return nullptr;
     }
     return reinterpret_cast<jobject>(reference_);
@@ -147,12 +147,12 @@
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
-// A special case of Handle that only holds references to null.
+// A special case of Handle that only holds references to null. Invalid when if it goes out of
+// scope. Example: Handle<T> h = ScopedNullHandle<T> will leave h being undefined.
 template<class T>
-class NullHandle : public Handle<T> {
+class ScopedNullHandle : public Handle<T> {
  public:
-  NullHandle() : Handle<T>(&null_ref_) {
-  }
+  ScopedNullHandle() : Handle<T>(&null_ref_) {}
 
  private:
   StackReference<mirror::Object> null_ref_;
diff --git a/runtime/image.cc b/runtime/image.cc
index 1bc19ff..f8f930b 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '4', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -36,7 +36,9 @@
                          uint32_t oat_data_end,
                          uint32_t oat_file_end,
                          uint32_t pointer_size,
-                         bool compile_pic)
+                         bool compile_pic,
+                         StorageMode storage_mode,
+                         size_t data_size)
   : image_begin_(image_begin),
     image_size_(image_size),
     oat_checksum_(oat_checksum),
@@ -47,7 +49,9 @@
     patch_delta_(0),
     image_roots_(image_roots),
     pointer_size_(pointer_size),
-    compile_pic_(compile_pic) {
+    compile_pic_(compile_pic),
+    storage_mode_(storage_mode),
+    data_size_(data_size) {
   CHECK_EQ(image_begin, RoundUp(image_begin, kPageSize));
   CHECK_EQ(oat_file_begin, RoundUp(oat_file_begin, kPageSize));
   CHECK_EQ(oat_data_begin, RoundUp(oat_data_begin, kPageSize));
@@ -122,7 +126,7 @@
   mirror::ObjectArray<mirror::Object>* image_roots =
       reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(image_roots_);
   mirror::ObjectArray<mirror::Object>* result =
-      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kWithReadBarrier, true>(
+      ReadBarrier::BarrierForRoot<mirror::ObjectArray<mirror::Object>, kWithReadBarrier>(
           &image_roots);
   DCHECK_EQ(image_roots, result);
   return result;
diff --git a/runtime/image.h b/runtime/image.h
index 555cf5d..7418f66 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -78,10 +78,27 @@
 // header of image files written by ImageWriter, read and validated by Space.
 class PACKED(4) ImageHeader {
  public:
+  enum StorageMode : uint32_t {
+    kStorageModeUncompressed,
+    kStorageModeLZ4,
+    kStorageModeCount,  // Number of elements in enum.
+  };
+  static constexpr StorageMode kDefaultStorageMode = kStorageModeUncompressed;
+
   ImageHeader()
-      : image_begin_(0U), image_size_(0U), oat_checksum_(0U), oat_file_begin_(0U),
-        oat_data_begin_(0U), oat_data_end_(0U), oat_file_end_(0U), patch_delta_(0),
-        image_roots_(0U), pointer_size_(0U), compile_pic_(0) {}
+      : image_begin_(0U),
+        image_size_(0U),
+        oat_checksum_(0U),
+        oat_file_begin_(0U),
+        oat_data_begin_(0U),
+        oat_data_end_(0U),
+        oat_file_end_(0U),
+        patch_delta_(0),
+        image_roots_(0U),
+        pointer_size_(0U),
+        compile_pic_(0),
+        storage_mode_(kDefaultStorageMode),
+        data_size_(0) {}
 
   ImageHeader(uint32_t image_begin,
               uint32_t image_size,
@@ -93,7 +110,9 @@
               uint32_t oat_data_end,
               uint32_t oat_file_end,
               uint32_t pointer_size,
-              bool compile_pic);
+              bool compile_pic,
+              StorageMode storage_mode,
+              size_t data_size);
 
   bool IsValid() const;
   const char* GetMagic() const;
@@ -170,6 +189,7 @@
     kSectionArtMethods,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
+    kSectionClassTable,
     kSectionImageBitmap,
     kSectionCount,  // Number of elements in enum.
   };
@@ -193,6 +213,14 @@
     return compile_pic_ != 0;
   }
 
+  StorageMode GetStorageMode() const {
+    return storage_mode_;
+  }
+
+  uint64_t GetDataSize() const {
+    return data_size_;
+  }
+
  private:
   static const uint8_t kImageMagic[4];
   static const uint8_t kImageVersion[4];
@@ -234,12 +262,19 @@
   // Boolean (0 or 1) to denote if the image was compiled with --compile-pic option
   const uint32_t compile_pic_;
 
-  // Image sections
+  // Image section sizes/offsets correspond to the uncompressed form.
   ImageSection sections_[kSectionCount];
 
   // Image methods.
   uint64_t image_methods_[kImageMethodsCount];
 
+  // Storage method for the image, the image may be compressed.
+  StorageMode storage_mode_;
+
+  // Data size for the image data excluding the bitmap and the header. For compressed images, this
+  // is the compressed size in the file.
+  uint32_t data_size_;
+
   friend class ImageWriter;
 };
 
@@ -247,6 +282,7 @@
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageRoot& policy);
 std::ostream& operator<<(std::ostream& os, const ImageHeader::ImageSections& section);
 std::ostream& operator<<(std::ostream& os, const ImageSection& section);
+std::ostream& operator<<(std::ostream& os, const ImageHeader::StorageMode& mode);
 
 }  // namespace art
 
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index d13526b..2d0ae63 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -344,8 +344,11 @@
     segment_state_.all = new_state;
   }
 
-  static Offset SegmentStateOffset() {
-    return Offset(OFFSETOF_MEMBER(IndirectReferenceTable, segment_state_));
+  static Offset SegmentStateOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
+    // Note: Currently segment_state_ is at offset 0. We're testing the expected value in
+    //       jni_internal_test to make sure it stays correct. It is not OFFSETOF_MEMBER, as that
+    //       is not pointer-size-safe.
+    return Offset(0);
   }
 
   // Release pages past the end of the table that may have previously held references.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index bc2c197..9f61449 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -93,11 +93,8 @@
     // We need the class to be resolved to install/uninstall stubs. Otherwise its methods
     // could not be initialized or linked with regards to class inheritance.
   } else {
-    for (size_t i = 0, e = klass->NumDirectMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetDirectMethod(i, sizeof(void*)));
-    }
-    for (size_t i = 0, e = klass->NumVirtualMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetVirtualMethod(i, sizeof(void*)));
+    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+      InstallStubsForMethod(&method);
     }
   }
 }
@@ -1062,7 +1059,9 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d686f74..871fad7 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,10 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -408,7 +411,7 @@
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +422,16 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      DCHECK(instr->IsInvoke());
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +440,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..8e7f3da 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -37,7 +37,8 @@
                                        mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bf95a0e..c9831e6 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -66,6 +66,11 @@
     }                                                                                           \
   } while (false)
 
+#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+  do { \
+    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -542,6 +547,7 @@
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -551,6 +557,7 @@
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -560,6 +567,7 @@
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -569,6 +577,7 @@
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -578,6 +587,7 @@
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -681,6 +691,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -695,6 +706,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -709,6 +721,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -723,6 +736,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -737,6 +751,7 @@
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -751,6 +766,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -764,6 +780,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -777,6 +794,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -790,6 +808,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +822,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -816,6 +836,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -829,6 +850,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 92b6e4f..60ad0cb 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -128,8 +128,13 @@
   }
   StackHandleScope<1> hs(self);
   Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
-  UnstartedRuntimeFindClass(self, h_class_name, NullHandle<mirror::ClassLoader>(), result,
-                            "Class.forName", true, false);
+  UnstartedRuntimeFindClass(self,
+                            h_class_name,
+                            ScopedNullHandle<mirror::ClassLoader>(),
+                            result,
+                            "Class.forName",
+                            true,
+                            false);
   CheckExceptionGenerateClassNotFound(self);
 }
 
@@ -704,7 +709,7 @@
   Handle<mirror::Class> h_class(hs.NewHandle(
       runtime->GetClassLinker()->FindClass(self,
                                            "Ljava/io/StringReader;",
-                                           NullHandle<mirror::ClassLoader>())));
+                                           ScopedNullHandle<mirror::ClassLoader>())));
   if (h_class.Get() == nullptr) {
     AbortTransactionOrFail(self, "Could not find StringReader class");
     return;
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 7cc05f7..5c44193 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -17,6 +17,7 @@
 #include "jni_internal.h"
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
+
 #include <cutils/trace.h>
 #include <dlfcn.h>
 
@@ -31,6 +32,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
+#include "nativeloader/native_loader.h"
 #include "java_vm_ext.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
@@ -715,7 +717,8 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
-                                  std::string* error_msg) {
+                                  bool is_shared_namespace, jstring library_path,
+                                  jstring permitted_path, std::string* error_msg) {
   error_msg->clear();
 
   // See if we've already loaded this library.  If we have, and the class loader
@@ -774,7 +777,9 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_NOW);
+  void* handle = android::OpenNativeLibrary(env, runtime_->GetTargetSdkVersion(), path_str,
+                                            class_loader, is_shared_namespace, library_path,
+                                            permitted_path);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 618f6fa..8cae1e5 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -82,10 +82,11 @@
   /**
    * Loads the given shared library. 'path' is an absolute pathname.
    *
-   * Returns 'true' on success. On failure, sets 'detail' to a
+   * Returns 'true' on success. On failure, sets 'error_msg' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
+  bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
+                         bool is_shared_namespace, jstring library_path, jstring permitted_path,
                          std::string* error_msg);
 
   // Unload native libraries with cleared class loaders.
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index e492d7e..961dd36 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -164,7 +164,7 @@
  * have stored null bytes in a multi-byte encoding).
  */
 void expandBufAddUtf8String(ExpandBuf* pBuf, const char* s) {
-  int strLen = strlen(s);
+  int strLen = (s != nullptr ? strlen(s) : 0);
   ensureSpace(pBuf, sizeof(uint32_t) + strLen);
   SetUtf8String(pBuf->storage + pBuf->curLen, s, strLen);
   pBuf->curLen += sizeof(uint32_t) + strLen;
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
new file mode 100644
index 0000000..3c2898b
--- /dev/null
+++ b/runtime/jit/debugger_interface.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debugger_interface.h"
+
+namespace art {
+
+// -------------------------------------------------------------------
+// Binary GDB JIT Interface as described in
+//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
+// -------------------------------------------------------------------
+extern "C" {
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } JITAction;
+
+  struct JITCodeEntry {
+    JITCodeEntry* next_;
+    JITCodeEntry* prev_;
+    const uint8_t *symfile_addr_;
+    uint64_t symfile_size_;
+  };
+
+  struct JITDescriptor {
+    uint32_t version_;
+    uint32_t action_flag_;
+    JITCodeEntry* relevant_entry_;
+    JITCodeEntry* first_entry_;
+  };
+
+  // GDB will place breakpoint into this function.
+  // To prevent GCC from inlining or removing it we place noinline attribute
+  // and inline assembler statement inside.
+  void __attribute__((noinline)) __jit_debug_register_code();
+  void __attribute__((noinline)) __jit_debug_register_code() {
+    __asm__("");
+  }
+
+  // GDB will inspect contents of this descriptor.
+  // Static initialization is necessary to prevent GDB from seeing
+  // uninitialized descriptor.
+  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
+}
+
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size) {
+  JITCodeEntry* entry = new JITCodeEntry;
+  entry->symfile_addr_ = symfile_addr;
+  entry->symfile_size_ = symfile_size;
+  entry->prev_ = nullptr;
+
+  // TODO: Do we need a lock here?
+  entry->next_ = __jit_debug_descriptor.first_entry_;
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry;
+  }
+  __jit_debug_descriptor.first_entry_ = entry;
+  __jit_debug_descriptor.relevant_entry_ = entry;
+
+  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
+  __jit_debug_register_code();
+  return entry;
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  // TODO: Do we need a lock here?
+  if (entry->prev_ != nullptr) {
+    entry->prev_->next_ = entry->next_;
+  } else {
+    __jit_debug_descriptor.first_entry_ = entry->next_;
+  }
+
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry->prev_;
+  }
+
+  __jit_debug_descriptor.relevant_entry_ = entry;
+  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
+  __jit_debug_register_code();
+  delete entry;
+}
+
+}  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
new file mode 100644
index 0000000..a784ef5
--- /dev/null
+++ b/runtime/jit/debugger_interface.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+#define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+
+#include <inttypes.h>
+
+namespace art {
+
+extern "C" {
+  struct JITCodeEntry;
+}
+
+// Notify native debugger about new JITed code by passing in-memory ELF.
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size);
+
+// Notify native debugger that JITed code has been removed.
+void DeleteJITCodeEntry(JITCodeEntry* entry);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ecbf13c..b2fc74d 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -24,6 +24,8 @@
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
+#include "oat_file_manager.h"
+#include "offline_profiling_info.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "utils.h"
@@ -44,14 +46,16 @@
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
+  jit_options->save_profiling_info_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);;
   return jit_options;
 }
 
 void Jit::DumpInfo(std::ostream& os) {
-  os << "Code cache size=" << PrettySize(code_cache_->CodeCacheSize())
-     << " data cache size=" << PrettySize(code_cache_->DataCacheSize())
-     << " number of compiled code=" << code_cache_->NumberOfCompiledCode()
-     << "\n";
+  os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
+     << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
+     << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
+     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n";
   cumulative_timings_.Dump(os);
 }
 
@@ -76,6 +80,10 @@
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
+  jit->offline_profile_info_.reset(nullptr);
+  if (options->GetSaveProfilingInfo()) {
+    jit->offline_profile_info_.reset(new OfflineProfilingInfo());
+  }
   LOG(INFO) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
@@ -134,11 +142,24 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
   DCHECK(!method->IsRuntimeMethod());
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    return false;
+  }
+
+  if (!code_cache_->NotifyCompilationOf(method, self)) {
+    return false;
+  }
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  code_cache_->DoneCompiling(method, self);
+  return success;
 }
 
 void Jit::CreateThreadPool() {
@@ -152,6 +173,24 @@
   }
 }
 
+void Jit::SaveProfilingInfo(const std::string& filename) {
+  if (offline_profile_info_ == nullptr) {
+    return;
+  }
+  uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
+  if (offline_profile_info_->NeedsSaving(last_update_ns)) {
+    VLOG(profiler) << "Initiate save profiling information to: " << filename;
+    std::set<ArtMethod*> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      code_cache_->GetCompiledArtMethods(offline_profile_info_->GetTrackedDexLocations(), methods);
+    }
+    offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods);
+  } else {
+    VLOG(profiler) << "No need to save profiling information to: " << filename;
+  }
+}
+
 Jit::~Jit() {
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
@@ -171,5 +210,12 @@
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
 }
 
+void Jit::SetDexLocationsForProfiling(const std::vector<std::string>& dex_base_locations) {
+  if (offline_profile_info_ == nullptr) {
+    return;
+  }
+  offline_profile_info_->SetTrackedDexLocations(dex_base_locations);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index fc76549..7a2db31 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -26,6 +26,7 @@
 #include "gc_root.h"
 #include "jni.h"
 #include "object_callbacks.h"
+#include "offline_profiling_info.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -71,6 +72,13 @@
     return instrumentation_cache_.get();
   }
 
+  void SetDexLocationsForProfiling(const std::vector<std::string>& dex_locations);
+  void SaveProfilingInfo(const std::string& filename);
+
+  void DumpForSigQuit(std::ostream& os) {
+    DumpInfo(os);
+  }
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -90,6 +98,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
+  std::unique_ptr<OfflineProfilingInfo> offline_profile_info_;
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
@@ -111,12 +120,18 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  bool GetSaveProfilingInfo() const {
+    return save_profiling_info_;
+  }
   bool UseJIT() const {
     return use_jit_;
   }
   void SetUseJIT(bool b) {
     use_jit_ = b;
   }
+  void SetSaveProfilingInfo(bool b) {
+    save_profiling_info_ = b;
+  }
 
  private:
   bool use_jit_;
@@ -125,13 +140,15 @@
   size_t compile_threshold_;
   size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
+  bool save_profiling_info_;
 
   JitOptions()
       : use_jit_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
-        dump_info_on_shutdown_(false) { }
+        dump_info_on_shutdown_(false),
+        save_profiling_info_(false) { }
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index da79109..033a8f0 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,6 +19,8 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/stl_util.h"
+#include "base/time_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -109,7 +111,8 @@
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
-      has_done_one_collection_(false) {
+      has_done_one_collection_(false),
+      last_update_time_ns_(0) {
 
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
@@ -314,6 +317,7 @@
       // code.
       GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
     }
+    last_update_time_ns_ = NanoTime();
     VLOG(jit)
         << "JIT added "
         << PrettyMethod(method) << "@" << method
@@ -533,7 +537,9 @@
       instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
+      if (!info->IsMethodBeingCompiled()) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+      }
     }
   }
 
@@ -574,12 +580,17 @@
       }
     }
 
-    // Free all profiling info.
-    for (ProfilingInfo* info : profiling_infos_) {
-      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
-    }
-    profiling_infos_.clear();
+    void* data_mspace = data_mspace_;
+    // Free all profiling infos of methods that were not being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [data_mspace] (ProfilingInfo* info) {
+        if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
 
     live_bitmap_.reset(nullptr);
     has_done_one_collection_ = true;
@@ -640,7 +651,7 @@
                                                       ArtMethod* method,
                                                       const std::vector<uint32_t>& entries) {
   size_t profile_info_size = RoundUp(
-      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
   ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
@@ -677,5 +688,39 @@
   }
 }
 
+void JitCodeCache::GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
+                                         std::set<ArtMethod*>& methods) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto it : method_code_map_) {
+    if (ContainsElement(dex_base_locations, it.second->GetDexFile()->GetBaseLocation())) {
+      methods.insert(it.second);
+    }
+  }
+}
+
+uint64_t JitCodeCache::GetLastUpdateTimeNs() {
+  MutexLock mu(Thread::Current(), lock_);
+  return last_update_time_ns_;
+}
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
+  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr || info->IsMethodBeingCompiled()) {
+    return false;
+  }
+  info->SetIsMethodBeingCompiled(true);
+  return true;
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info->IsMethodBeingCompiled());
+  info->SetIsMethodBeingCompiled(false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 13481e0..0ceb17a 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -23,7 +23,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/bitmap.h"
-#include "gc/allocator/dlmalloc.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "oat_file.h"
@@ -66,6 +65,14 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
@@ -139,6 +146,19 @@
 
   void* MoreCore(const void* mspace, intptr_t increment);
 
+  // Adds to `methods` all the compiled ArtMethods which are part of any of the given dex locations.
+  void GetCompiledArtMethods(const std::set<const std::string>& dex_base_locations,
+                             std::set<ArtMethod*>& methods)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_);
+
+  size_t GetCurrentCapacity() REQUIRES(!lock_) {
+    MutexLock lock(Thread::Current(), lock_);
+    return current_capacity_;
+  }
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -228,6 +248,9 @@
   // Whether a collection has already been done on the current capacity.
   bool has_done_one_collection_ GUARDED_BY(lock_);
 
+  // Last time the the code_cache was updated.
+  uint64_t last_update_time_ns_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
new file mode 100644
index 0000000..511b53d
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.cc
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offline_profiling_info.h"
+
+#include <fstream>
+#include <set>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "art_method-inl.h"
+#include "base/mutex.h"
+#include "base/stl_util.h"
+#include "jit/profiling_info.h"
+#include "safe_map.h"
+
+namespace art {
+
+// An arbitrary value to throttle save requests. Set to 500ms for now.
+static constexpr const uint64_t kMilisecondsToNano = 1000000;
+static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano;
+
+void OfflineProfilingInfo::SetTrackedDexLocations(
+      const std::vector<std::string>& dex_base_locations) {
+  tracked_dex_base_locations_.clear();
+  tracked_dex_base_locations_.insert(dex_base_locations.begin(), dex_base_locations.end());
+  VLOG(profiler) << "Tracking dex locations: " << Join(dex_base_locations, ':');
+}
+
+const std::set<const std::string>& OfflineProfilingInfo::GetTrackedDexLocations() const {
+  return tracked_dex_base_locations_;
+}
+
+bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const {
+  return !tracked_dex_base_locations_.empty() &&
+      (last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs);
+}
+
+void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename,
+                                             uint64_t last_update_time_ns,
+                                             const std::set<ArtMethod*>& methods) {
+  if (!NeedsSaving(last_update_time_ns)) {
+    VLOG(profiler) << "No need to saved profile info to " << filename;
+    return;
+  }
+
+  if (methods.empty()) {
+    VLOG(profiler) << "No info to save to " << filename;
+    return;
+  }
+
+  DexFileToMethodsMap info;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    for (auto it = methods.begin(); it != methods.end(); it++) {
+      DCHECK(ContainsElement(tracked_dex_base_locations_, (*it)->GetDexFile()->GetBaseLocation()));
+      AddMethodInfo(*it, &info);
+    }
+  }
+
+  // This doesn't need locking because we are trying to lock the file for exclusive
+  // access and fail immediately if we can't.
+  if (Serialize(filename, info)) {
+    last_update_time_ns_.StoreRelaxed(last_update_time_ns);
+    VLOG(profiler) << "Successfully saved profile info to "
+                   << filename << " with time stamp: " << last_update_time_ns;
+  }
+}
+
+void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
+  DCHECK(method != nullptr);
+  const DexFile* dex_file = method->GetDexFile();
+
+  auto info_it = info->find(dex_file);
+  if (info_it == info->end()) {
+    info_it = info->Put(dex_file, std::set<uint32_t>());
+  }
+  info_it->second.insert(method->GetDexMethodIndex());
+}
+
+enum OpenMode {
+  READ,
+  READ_WRITE
+};
+
+static int OpenFile(const std::string& filename, OpenMode open_mode) {
+  int fd = -1;
+  switch (open_mode) {
+    case READ:
+      fd = open(filename.c_str(), O_RDONLY);
+      break;
+    case READ_WRITE:
+      // TODO(calin) allow the shared uid of the app to access the file.
+      fd = open(filename.c_str(),
+                    O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                    S_IRUSR | S_IWUSR);
+      break;
+  }
+
+  if (fd < 0) {
+    PLOG(WARNING) << "Failed to open profile file " << filename;
+    return -1;
+  }
+
+  // Lock the file for exclusive access but don't wait if we can't lock it.
+  int err = flock(fd, LOCK_EX | LOCK_NB);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to lock profile file " << filename;
+    return -1;
+  }
+  return fd;
+}
+
+static bool CloseDescriptorForFile(int fd, const std::string& filename) {
+  // Now unlock the file, allowing another process in.
+  int err = flock(fd, LOCK_UN);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to unlock profile file " << filename;
+    return false;
+  }
+
+  // Done, close the file.
+  err = ::close(fd);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
+    return false;
+  }
+
+  return true;
+}
+
+static void WriteToFile(int fd, const std::ostringstream& os) {
+  std::string data(os.str());
+  const char *p = data.c_str();
+  size_t length = data.length();
+  do {
+    int n = ::write(fd, p, length);
+    p += n;
+    length -= n;
+  } while (length > 0);
+}
+
+static constexpr const char kFieldSeparator = ',';
+static constexpr const char kLineSeparator = '\n';
+
+/**
+ * Serialization format:
+ *    dex_location1,dex_location_checksum1,method_id11,method_id12...
+ *    dex_location2,dex_location_checksum2,method_id21,method_id22...
+ * e.g.
+ *    /system/priv-app/app/app.apk,131232145,11,23,454,54
+ *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
+ **/
+bool OfflineProfilingInfo::Serialize(const std::string& filename,
+                                     const DexFileToMethodsMap& info) const {
+  int fd = OpenFile(filename, READ_WRITE);
+  if (fd == -1) {
+    return false;
+  }
+
+  // TODO(calin): Merge with a previous existing profile.
+  // TODO(calin): Profile this and see how much memory it takes. If too much,
+  // write to file directly.
+  std::ostringstream os;
+  for (auto it : info) {
+    const DexFile* dex_file = it.first;
+    const std::set<uint32_t>& method_dex_ids = it.second;
+
+    os << dex_file->GetLocation()
+        << kFieldSeparator
+        << dex_file->GetLocationChecksum();
+    for (auto method_it : method_dex_ids) {
+      os << kFieldSeparator << method_it;
+    }
+    os << kLineSeparator;
+  }
+
+  WriteToFile(fd, os);
+
+  return CloseDescriptorForFile(fd, filename);
+}
+
+// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
+// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
+static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
+  const char* p = s.data();
+  const char* end = p + s.size();
+  // Check if the first character is the separator.
+  if (p != end && *p ==separator) {
+    result->push_back("");
+    ++p;
+  }
+  // Process the rest of the characters.
+  while (p != end) {
+    if (*p == separator) {
+      ++p;
+    } else {
+      const char* start = p;
+      while (++p != end && *p != separator) {
+        // Skip to the next occurrence of the separator.
+      }
+      result->push_back(std::string(start, p - start));
+    }
+  }
+}
+
+bool ProfileCompilationInfo::ProcessLine(const std::string& line,
+                                         const std::vector<const DexFile*>& dex_files) {
+  std::vector<std::string> parts;
+  SplitString(line, kFieldSeparator, &parts);
+  if (parts.size() < 3) {
+    LOG(WARNING) << "Invalid line: " << line;
+    return false;
+  }
+
+  const std::string& dex_location = parts[0];
+  uint32_t checksum;
+  if (!ParseInt(parts[1].c_str(), &checksum)) {
+    return false;
+  }
+
+  const DexFile* current_dex_file = nullptr;
+  for (auto dex_file : dex_files) {
+    if (dex_file->GetLocation() == dex_location) {
+      if (checksum != dex_file->GetLocationChecksum()) {
+        LOG(WARNING) << "Checksum mismatch for "
+            << dex_file->GetLocation() << " when parsing " << filename_;
+        return false;
+      }
+      current_dex_file = dex_file;
+      break;
+    }
+  }
+  if (current_dex_file == nullptr) {
+    return true;
+  }
+
+  for (size_t i = 2; i < parts.size(); i++) {
+    uint32_t method_idx;
+    if (!ParseInt(parts[i].c_str(), &method_idx)) {
+      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
+      return false;
+    }
+    uint16_t class_idx = current_dex_file->GetMethodId(method_idx).class_idx_;
+    auto info_it = info_.find(current_dex_file);
+    if (info_it == info_.end()) {
+      info_it = info_.Put(current_dex_file, ClassToMethodsMap());
+    }
+    ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it == class_map.end()) {
+      class_it = class_map.Put(class_idx, std::set<uint32_t>());
+    }
+    class_it->second.insert(method_idx);
+  }
+  return true;
+}
+
+// Parses the buffer (of length n) starting from start_from and identify new lines
+// based on kLineSeparator marker.
+// Returns the first position after kLineSeparator in the buffer (starting from start_from),
+// or -1 if the marker doesn't appear.
+// The processed characters are appended to the given line.
+static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
+  if (start_from >= n) {
+    return -1;
+  }
+  int new_line_pos = -1;
+  for (int i = start_from; i < n; i++) {
+    if (buffer[i] == kLineSeparator) {
+      new_line_pos = i;
+      break;
+    }
+  }
+  int append_limit = new_line_pos == -1 ? n : new_line_pos;
+  line.append(buffer + start_from, append_limit - start_from);
+  // Jump over kLineSeparator and return the position of the next character.
+  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
+}
+
+bool ProfileCompilationInfo::Load(const std::vector<const DexFile*>& dex_files) {
+  if (dex_files.empty()) {
+    return true;
+  }
+  if (kIsDebugBuild) {
+    // In debug builds verify that the locations are unique.
+    std::set<std::string> locations;
+    for (auto dex_file : dex_files) {
+      const std::string& location = dex_file->GetLocation();
+      DCHECK(locations.find(location) == locations.end())
+          << "DexFiles appear to belong to different apks."
+          << " There are multiple dex files with the same location: "
+          << location;
+      locations.insert(location);
+    }
+  }
+  info_.clear();
+
+  int fd = OpenFile(filename_, READ);
+  if (fd == -1) {
+    return false;
+  }
+
+  std::string current_line;
+  const int kBufferSize = 1024;
+  char buffer[kBufferSize];
+  bool success = true;
+
+  while (success) {
+    int n = read(fd, buffer, kBufferSize);
+    if (n < 0) {
+      PLOG(WARNING) << "Error when reading profile file " << filename_;
+      success = false;
+      break;
+    } else if (n == 0) {
+      break;
+    }
+    // Detect the new lines from the buffer. If we manage to complete a line,
+    // process it. Otherwise append to the current line.
+    int current_start_pos = 0;
+    while (current_start_pos < n) {
+      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
+      if (current_start_pos == -1) {
+        break;
+      }
+      if (!ProcessLine(current_line, dex_files)) {
+        success = false;
+        break;
+      }
+      // Reset the current line (we just processed it).
+      current_line.clear();
+    }
+  }
+  if (!success) {
+    info_.clear();
+  }
+  return CloseDescriptorForFile(fd, filename_) && success;
+}
+
+bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
+  auto info_it = info_.find(method_ref.dex_file);
+  if (info_it != info_.end()) {
+    uint16_t class_idx = method_ref.dex_file->GetMethodId(method_ref.dex_method_index).class_idx_;
+    const ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it != class_map.end()) {
+      const std::set<uint32_t>& methods = class_it->second;
+      return methods.find(method_ref.dex_method_index) != methods.end();
+    }
+    return false;
+  }
+  return false;
+}
+
+std::string ProfileCompilationInfo::DumpInfo(bool print_full_dex_location) const {
+  std::ostringstream os;
+  if (info_.empty()) {
+    return "ProfileInfo: empty";
+  }
+
+  os << "ProfileInfo:";
+
+  // Use an additional map to achieve a predefined order based on the dex locations.
+  SafeMap<const std::string, const DexFile*> dex_locations_map;
+  for (auto info_it : info_) {
+    dex_locations_map.Put(info_it.first->GetLocation(), info_it.first);
+  }
+
+  const std::string kFirstDexFileKeySubstitute = ":classes.dex";
+  for (auto dex_file_it : dex_locations_map) {
+    os << "\n";
+    const std::string& location = dex_file_it.first;
+    const DexFile* dex_file = dex_file_it.second;
+    if (print_full_dex_location) {
+      os << location;
+    } else {
+      // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
+    }
+    for (auto class_it : info_.find(dex_file)->second) {
+      for (auto method_it : class_it.second) {
+        os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+      }
+    }
+  }
+  return os.str();
+}
+
+}  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
new file mode 100644
index 0000000..8c5ffbe
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+
+#include <set>
+
+#include "atomic.h"
+#include "dex_file.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+class ArtMethod;
+
+/**
+ * Profiling information in a format that can be serialized to disk.
+ * It is a serialize-friendly format based on information collected
+ * by the interpreter (ProfileInfo).
+ * Currently it stores only the hot compiled methods.
+ */
+class OfflineProfilingInfo {
+ public:
+  bool NeedsSaving(uint64_t last_update_time_ns) const;
+  void SaveProfilingInfo(const std::string& filename,
+                         uint64_t last_update_time_ns,
+                         const std::set<ArtMethod*>& methods);
+  void SetTrackedDexLocations(const std::vector<std::string>& dex_locations);
+  const std::set<const std::string>& GetTrackedDexLocations() const;
+
+ private:
+  // Map identifying the location of the profiled methods.
+  // dex_file_ -> [dex_method_index]+
+  using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>;
+
+  void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
+
+  // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
+  // single thread?)
+  Atomic<uint64_t> last_update_time_ns_;
+
+  std::set<const std::string> tracked_dex_base_locations_;
+};
+
+/**
+ * Profile information in a format suitable to be queried by the compiler and performing
+ * profile guided compilation.
+ */
+class ProfileCompilationInfo {
+ public:
+  // Constructs a ProfileCompilationInfo backed by the provided file.
+  explicit ProfileCompilationInfo(const std::string& filename) : filename_(filename) {}
+
+  // Loads profile information corresponding to the provided dex files.
+  // The dex files' multidex suffixes must be unique.
+  // This resets the state of the profiling information
+  // (i.e. all previously loaded info are cleared).
+  bool Load(const std::vector<const DexFile*>& dex_files);
+
+  // Returns true if the method reference is present in the profiling info.
+  bool ContainsMethod(const MethodReference& method_ref) const;
+
+  const std::string& GetFilename() const { return filename_; }
+
+  // Dumps all the loaded profile info into a string and returns it.
+  // This is intended for testing and debugging.
+  std::string DumpInfo(bool print_full_dex_location = true) const;
+
+ private:
+  bool ProcessLine(const std::string& line,
+                   const std::vector<const DexFile*>& dex_files);
+
+  using ClassToMethodsMap = SafeMap<uint32_t, std::set<uint32_t>>;
+  // Map identifying the location of the profiled methods.
+  // dex_file -> class_index -> [dex_method_index]+
+  using DexFileToProfileInfoMap = SafeMap<const DexFile*, ClassToMethodsMap>;
+
+  const std::string filename_;
+  DexFileToProfileInfoMap info_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2e52b1b..3820592 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -54,28 +54,29 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return true;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
   return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
-  DCHECK(cache != nullptr);
+  return cache;
+}
 
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
+  CHECK(cache != nullptr) << PrettyMethod(method_) << "@" << dex_pc;
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b13a315..ddaf02f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,6 +25,7 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
 
 namespace jit {
 class JitCodeCache;
@@ -34,6 +35,49 @@
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    return classes_[0].Read();
+  }
+
+  bool IsUnitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+ private:
+  static constexpr uint16_t kIndividualCacheSize = 5;
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
@@ -67,44 +111,24 @@
     return method_;
   }
 
+  InlineCache* GetInlineCache(uint32_t dex_pc);
+
+  bool IsMethodBeingCompiled() const {
+    return is_method_being_compiled_;
+  }
+
+  void SetIsMethodBeingCompiled(bool value) {
+    is_method_being_compiled_ = value;
+  }
+
  private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
-
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
-
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
       : number_of_inline_caches_(entries.size()),
-        method_(method) {
+        method_(method),
+        is_method_being_compiled_(false) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+      cache_[i].dex_pc_ = entries[i];
     }
   }
 
@@ -114,6 +138,11 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index dab1040..aa25f67 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -105,9 +105,32 @@
   stacked_local_ref_cookies.pop_back();
 }
 
-Offset JNIEnvExt::SegmentStateOffset() {
-  return Offset(OFFSETOF_MEMBER(JNIEnvExt, locals) +
-                IndirectReferenceTable::SegmentStateOffset().Int32Value());
+// Note: the offset code is brittle, as we can't use OFFSETOF_MEMBER or offsetof easily. Thus, there
+//       are tests in jni_internal_test to match the results against the actual values.
+
+// This is encoding the knowledge of the structure and layout of JNIEnv fields.
+static size_t JNIEnvSize(size_t pointer_size) {
+  // A single pointer.
+  return pointer_size;
+}
+
+Offset JNIEnvExt::SegmentStateOffset(size_t pointer_size) {
+  size_t locals_offset = JNIEnvSize(pointer_size) +
+                         2 * pointer_size +          // Thread* self + JavaVMExt* vm.
+                         4 +                         // local_ref_cookie.
+                         (pointer_size - 4);         // Padding.
+  size_t irt_segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(pointer_size).Int32Value();
+  return Offset(locals_offset + irt_segment_state_offset);
+}
+
+Offset JNIEnvExt::LocalRefCookieOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size) +
+                2 * pointer_size);          // Thread* self + JavaVMExt* vm
+}
+
+Offset JNIEnvExt::SelfOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size));
 }
 
 // Use some defining part of the caller's frame as the identifying mark for the JNI segment.
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 3828ff0..2f8decf 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -50,15 +50,9 @@
   T AddLocalReference(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static Offset SegmentStateOffset();
-
-  static Offset LocalRefCookieOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie));
-  }
-
-  static Offset SelfOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, self));
-  }
+  static Offset SegmentStateOffset(size_t pointer_size);
+  static Offset LocalRefCookieOffset(size_t pointer_size);
+  static Offset SelfOffset(size_t pointer_size);
 
   jobject NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 415109f..cb67ee3 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -316,12 +316,7 @@
 static ArtMethod* FindMethod(mirror::Class* c, const StringPiece& name, const StringPiece& sig)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  for (auto& method : c->GetDirectMethods(pointer_size)) {
-    if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
-      return &method;
-    }
-  }
-  for (auto& method : c->GetVirtualMethods(pointer_size)) {
+  for (auto& method : c->GetMethods(pointer_size)) {
     if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
       return &method;
     }
@@ -1689,7 +1684,8 @@
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
       const jchar* chars = s->GetValue();
-      ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
+      size_t bytes = CountUtf8Bytes(chars + start, length);
+      ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length);
     }
   }
 
@@ -1772,7 +1768,7 @@
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
     const uint16_t* chars = s->GetValue();
-    ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
+    ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
   }
@@ -2219,13 +2215,7 @@
 
     size_t unregistered_count = 0;
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
-      if (m.IsNative()) {
-        m.UnregisterNative();
-        unregistered_count++;
-      }
-    }
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       if (m.IsNative()) {
         m.UnregisterNative();
         unregistered_count++;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 649df5f..c718466 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -18,7 +18,9 @@
 
 #include "art_method-inl.h"
 #include "common_compiler_test.h"
+#include "indirect_reference_table.h"
 #include "java_vm_ext.h"
+#include "jni_env_ext.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -2089,8 +2091,7 @@
   MakeExecutable(nullptr, "java.lang.Class");
   MakeExecutable(nullptr, "java.lang.Object");
   MakeExecutable(nullptr, "java.nio.DirectByteBuffer");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock");
-  MakeExecutable(nullptr, "java.nio.MemoryBlock$UnmanagedBlock");
+  MakeExecutable(nullptr, "java.nio.Bits");
   MakeExecutable(nullptr, "java.nio.MappedByteBuffer");
   MakeExecutable(nullptr, "java.nio.ByteBuffer");
   MakeExecutable(nullptr, "java.nio.Buffer");
@@ -2210,4 +2211,92 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
+// Test the offset computation of IndirectReferenceTable offsets. b/26071368.
+TEST_F(JniInternalTest, IndirectReferenceTableOffsets) {
+  // The segment_state_ field is private, and we want to avoid friend declaration. So we'll check
+  // by modifying memory.
+  // The parameters don't really matter here.
+  IndirectReferenceTable irt(5, 5, IndirectRefKind::kGlobal, true);
+  uint32_t old_state = irt.GetSegmentState();
+
+  // Write some new state directly. We invert parts of old_state to ensure a new value.
+  uint32_t new_state = old_state ^ 0x07705005;
+  ASSERT_NE(old_state, new_state);
+
+  uint8_t* base = reinterpret_cast<uint8_t*>(&irt);
+  int32_t segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
+  *reinterpret_cast<uint32_t*>(base + segment_state_offset) = new_state;
+
+  // Read and compare.
+  EXPECT_EQ(new_state, irt.GetSegmentState());
+}
+
+// Test the offset computation of JNIEnvExt offsets. b/26071368.
+TEST_F(JniInternalTest, JNIEnvExtOffsets) {
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie),
+            JNIEnvExt::LocalRefCookieOffset(sizeof(void*)).Int32Value());
+
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, self), JNIEnvExt::SelfOffset(sizeof(void*)).Int32Value());
+
+  // segment_state_ is private in the IndirectReferenceTable. So this test isn't as good as we'd
+  // hope it to be.
+  int32_t segment_state_now =
+      OFFSETOF_MEMBER(JNIEnvExt, locals) +
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
+  int32_t segment_state_computed = JNIEnvExt::SegmentStateOffset(sizeof(void*)).Int32Value();
+  EXPECT_EQ(segment_state_now, segment_state_computed);
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4b2ac20..e133847 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -154,8 +154,10 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
@@ -164,9 +166,11 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -239,15 +243,16 @@
   std::string error_detail;
   CheckNonOverlapping(expected, limit, &error_detail);
 
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
@@ -379,7 +384,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -414,15 +420,17 @@
                                                            page_aligned_offset,
                                                            low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index a67a925..efce09a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -99,11 +99,12 @@
                             error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
   static MemMap* MapFileAtAddress(uint8_t* addr,
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 9e416dc..ef4fe15 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -22,13 +22,14 @@
 #include "art_field-inl.h"
 #include "art_method.h"
 #include "art_method-inl.h"
+#include "base/array_slice.h"
+#include "base/length_prefixed_array.h"
 #include "class_loader.h"
 #include "common_throws.h"
 #include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "length_prefixed_array.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
@@ -62,61 +63,148 @@
   return GetFieldObject<DexCache, kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_));
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtr() {
+inline uint32_t Class::GetCopiedMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_));
+}
+
+inline uint32_t Class::GetDirectMethodsStartOffset() {
+  return 0;
+}
+
+inline uint32_t Class::GetVirtualMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSlice(size_t pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetDirectMethodsPtrUnchecked();
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDirectMethodsSliceUnchecked(pointer_size);
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtrUnchecked() {
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetVirtualMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDeclaredMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetVirtualMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSliceUnchecked(size_t pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetVirtualMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetCopiedMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSliceUnchecked(size_t pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetCopiedMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+inline LengthPrefixedArray<ArtMethod>* Class::GetMethodsPtr() {
   return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_)));
+      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, methods_)));
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtrUnchecked() {
-  return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_)));
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               0,
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
 }
 
-inline void Class::SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  DCHECK(GetDirectMethodsPtrUnchecked() == nullptr);
-  SetDirectMethodsPtrUnchecked(new_direct_methods);
-}
 
-inline void Class::SetDirectMethodsPtrUnchecked(
-    LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_),
-                    reinterpret_cast<uint64_t>(new_direct_methods));
+inline uint32_t Class::NumMethods() {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return (methods == nullptr) ? 0 : methods->size();
 }
 
 inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetDirectMethodsSliceUnchecked(pointer_size).At(i);
 }
 
 inline ArtMethod* Class::GetDirectMethod(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtr();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetDirectMethodsSlice(pointer_size).At(i);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtr() {
-  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
-  return GetVirtualMethodsPtrUnchecked();
+inline void Class::SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                                 uint32_t num_direct,
+                                 uint32_t num_virtual) {
+  DCHECK(GetMethodsPtr() == nullptr);
+  SetMethodsPtrUnchecked(new_methods, num_direct, num_virtual);
 }
 
-inline void Class::SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods) {
-  // TODO: we reassign virtual methods to grow the table for miranda
-  // methods.. they should really just be assigned once.
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_),
-                    reinterpret_cast<uint64_t>(new_virtual_methods));
+
+inline void Class::SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                                          uint32_t num_direct,
+                                          uint32_t num_virtual) {
+  DCHECK_LE(num_direct + num_virtual, (new_methods == nullptr) ? 0 : new_methods->size());
+  SetMethodsPtrInternal(new_methods);
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_),
+                    dchecked_integral_cast<uint16_t>(num_direct + num_virtual));
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_),
+                       dchecked_integral_cast<uint16_t>(num_direct));
+}
+
+inline void Class::SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods) {
+  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, methods_),
+                    reinterpret_cast<uint64_t>(new_methods));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -135,11 +223,7 @@
 
 inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  LengthPrefixedArray<ArtMethod>* methods = GetVirtualMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetVirtualMethodsSliceUnchecked(pointer_size).At(i);
 }
 
 inline PointerArray* Class::GetVTable() {
@@ -833,24 +917,42 @@
       CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
     }
   }
-  for (ArtMethod& method : GetDirectMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
-  }
-  for (ArtMethod& method : GetVirtualMethods(pointer_size)) {
+  for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots(visitor, pointer_size);
   }
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetDirectMethodsPtrUnchecked(),
-                                                   ArtMethod::Size(pointer_size),
-                                                   ArtMethod::Alignment(pointer_size));
+  return GetDirectMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredMethods(
+      size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetDeclaredMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredVirtualMethods(
+      size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetVirtualMethodsPtrUnchecked(),
+  return GetVirtualMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetCopiedMethods(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetCopiedMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetMethods(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return MakeIterationRangeFromLengthPrefixedArray(GetMethodsPtr(),
                                                    ArtMethod::Size(pointer_size),
                                                    ArtMethod::Alignment(pointer_size));
 }
@@ -918,13 +1020,15 @@
 }
 
 inline uint32_t Class::NumDirectMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetDirectMethodsPtrUnchecked();
-  return arr != nullptr ? arr->size() : 0u;
+  return GetVirtualMethodsStartOffset();
+}
+
+inline uint32_t Class::NumDeclaredVirtualMethods() {
+  return GetCopiedMethodsStartOffset() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumVirtualMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetVirtualMethodsPtrUnchecked();
-  return arr != nullptr ? arr->size() : 0u;
+  return NumMethods() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumInstanceFields() {
@@ -952,16 +1056,11 @@
   if (ifields != new_ifields) {
     dest->SetIFieldsPtrUnchecked(new_ifields);
   }
-  // Update direct and virtual method arrays.
-  LengthPrefixedArray<ArtMethod>* direct_methods = GetDirectMethodsPtr();
-  LengthPrefixedArray<ArtMethod>* new_direct_methods = visitor(direct_methods);
-  if (direct_methods != new_direct_methods) {
-    dest->SetDirectMethodsPtrUnchecked(new_direct_methods);
-  }
-  LengthPrefixedArray<ArtMethod>* virtual_methods = GetVirtualMethodsPtr();
-  LengthPrefixedArray<ArtMethod>* new_virtual_methods = visitor(virtual_methods);
-  if (virtual_methods != new_virtual_methods) {
-    dest->SetVirtualMethodsPtr(new_virtual_methods);
+  // Update method array.
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  LengthPrefixedArray<ArtMethod>* new_methods = visitor(methods);
+  if (methods != new_methods) {
+    dest->SetMethodsPtrInternal(new_methods);
   }
   // Update dex cache strings.
   GcRoot<mirror::String>* strings = GetDexCacheStrings();
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 05a9039..66060f2 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -457,6 +457,10 @@
   return nullptr;
 }
 
+// TODO These should maybe be changed to be named FindOwnedVirtualMethod or something similar
+// because they do not only find 'declared' methods and will return copied methods. This behavior is
+// desired and correct but the naming can lead to confusion because in the java language declared
+// excludes interface methods which might be found by this.
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
                                             size_t pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
@@ -482,10 +486,8 @@
 ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
                                             size_t pointer_size) {
   if (GetDexCache() == dex_cache) {
-    for (auto& method : GetVirtualMethods(pointer_size)) {
-      // A miranda method may have a different DexCache and is always created by linking,
-      // never *declared* in the class.
-      if (method.GetDexMethodIndex() == dex_method_idx && !method.IsMiranda()) {
+    for (auto& method : GetDeclaredVirtualMethods(pointer_size)) {
+      if (method.GetDexMethodIndex() == dex_method_idx) {
         return &method;
       }
     }
@@ -725,12 +727,7 @@
 
 void Class::SetPreverifiedFlagOnAllMethods(size_t pointer_size) {
   DCHECK(IsVerified());
-  for (auto& m : GetDirectMethods(pointer_size)) {
-    if (!m.IsNative() && m.IsInvokable()) {
-      m.SetPreverified();
-    }
-  }
-  for (auto& m : GetVirtualMethods(pointer_size)) {
+  for (auto& m : GetMethods(pointer_size)) {
     if (!m.IsNative() && m.IsInvokable()) {
       m.SetPreverified();
     }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 0ab5b97..489c269 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -44,6 +44,7 @@
 struct ClassOffsets;
 template<class T> class Handle;
 template<typename T> class LengthPrefixedArray;
+template<typename T> class ArraySlice;
 class Signature;
 class StringPiece;
 template<size_t kNumReferences> class PACKED(4) StackHandleScope;
@@ -559,8 +560,8 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(size_t pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 65;
-    return ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 0, pointer_size);
+    uint32_t vtable_entries = Object::kVTableLength + 69;
+    return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
   // The size of a java.lang.Class representing a primitive such as int.class.
@@ -702,12 +703,24 @@
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtr() SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetMethodsPtr()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                     uint32_t num_direct,
+                     uint32_t num_virtual)
       SHARED_REQUIRES(Locks::mutator_lock_);
   // Used by image writer.
-  void SetDirectMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  void SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                              uint32_t num_direct,
+                              uint32_t num_virtual)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSlice(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, size_t pointer_size)
@@ -723,18 +736,50 @@
   ALWAYS_INLINE uint32_t NumDirectMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtr()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredMethods(
+        size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredVirtualMethods(
+        size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetCopiedMethods(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSlice(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns the number of non-inherited virtual methods.
+  // Returns the number of non-inherited virtual methods (sum of declared and copied methods).
   ALWAYS_INLINE uint32_t NumVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns the number of copied virtual methods.
+  ALWAYS_INLINE uint32_t NumCopiedVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of declared virtual methods.
+  ALWAYS_INLINE uint32_t NumDeclaredVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t NumMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* GetVirtualMethod(size_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1155,10 +1200,19 @@
     return pointer_size;
   }
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSliceUnchecked(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Fix up all of the native pointers in the class by running them through the visitor. Only sets
@@ -1169,6 +1223,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
+  ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetVerifyError(Object* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool throw_on_failure, bool use_referrers_cache>
@@ -1194,6 +1251,15 @@
   IterationRange<StrideIterator<ArtField>> GetIFieldsUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // The index in the methods_ array where the first declared virtual method is.
+  ALWAYS_INLINE uint32_t GetVirtualMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first direct method is.
+  ALWAYS_INLINE uint32_t GetDirectMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first copied method is.
+  ALWAYS_INLINE uint32_t GetCopiedMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check that the pointer size matches the one in the class linker.
@@ -1206,6 +1272,11 @@
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // 'Class' Object Fields
+  // Order governed by java field ordering. See art::ClassLinker::LinkFields.
+
+  HeapReference<Object> annotation_type_;
+
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1251,12 +1322,12 @@
   // virtual_ methods_ for miranda methods.
   HeapReference<PointerArray> vtable_;
 
+  // Access flags; low 16 bits are defined by VM spec.
+  uint32_t access_flags_;
+
   // Short cuts to dex_cache_ member for fast compiled code access.
   uint64_t dex_cache_strings_;
 
-  // static, private, and <init> methods. Pointer to an ArtMethod length-prefixed array.
-  uint64_t direct_methods_;
-
   // instance fields
   //
   // These describe the layout of the contents of an Object.
@@ -1268,16 +1339,24 @@
   // ArtFields.
   uint64_t ifields_;
 
+  // Pointer to an ArtMethod length-prefixed array. All the methods where this class is the place
+  // where they are logically defined. This includes all private, static, final and virtual methods
+  // as well as inherited default methods and miranda methods.
+  //
+  // The slice methods_ [0, virtual_methods_offset_) are the direct (static, private, init) methods
+  // declared by this class.
+  //
+  // The slice methods_ [virtual_methods_offset_, copied_methods_offset_) are the virtual methods
+  // declared by this class.
+  //
+  // The slice methods_ [copied_methods_offset_, |methods_|) are the methods that are copied from
+  // interfaces such as miranda or default methods. These are copied for resolution purposes as this
+  // class is where they are (logically) declared as far as the virtual dispatch is concerned.
+  uint64_t methods_;
+
   // Static fields length-prefixed array.
   uint64_t sfields_;
 
-  // Virtual methods defined in this class; invoked through vtable. Pointer to an ArtMethod
-  // length-prefixed array.
-  uint64_t virtual_methods_;
-
-  // Access flags; low 16 bits are defined by VM spec.
-  uint32_t access_flags_;
-
   // Class flags to help speed up visiting object references.
   uint32_t class_flags_;
 
@@ -1317,6 +1396,14 @@
   // State of class initialization.
   Status status_;
 
+  // The offset of the first virtual method that is copied from an interface. This includes miranda,
+  // default, and default-conflict methods. Having a hard limit of ((2 << 16) - 1) for methods
+  // defined on a single class is well established in Java so we will use only uint16_t's here.
+  uint16_t copied_methods_offset_;
+
+  // The offset of the first declared virtual methods in the methods_ array.
+  uint16_t virtual_methods_offset_;
+
   // TODO: ?
   // initiating class loader list
   // NOTE: for classes with low serialNumber, these are unused, and the
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5c12091..4603428 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -163,6 +163,7 @@
 #endif
 }
 
+template<bool kCasRelease>
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
@@ -181,10 +182,13 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-    // This CAS is a CAS release so that when GC updates all the fields of an object and then
-    // changes the object from gray to black, the field updates (stores) will be visible (won't be
-    // reordered after this CAS.)
-  } while (!CasLockWordWeakRelease(expected_lw, new_lw));
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 5c6520f..71e704e 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -92,13 +92,13 @@
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
+
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
+
+  template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 01e99b9..bd4a9c1 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -23,7 +23,7 @@
 namespace mirror {
 
 inline uint32_t Reference::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 5;
+  uint32_t vtable_entries = Object::kVTableLength + 4;
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 28a830d..cdf468c 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -33,8 +33,8 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 52;
-  return Class::ComputeClassSize(true, vtable_entries, 0, 1, 0, 1, 2, pointer_size);
+  uint32_t vtable_entries = Object::kVTableLength + 53;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 2, 0, 1, 2, pointer_size);
 }
 
 // Sets string count in the allocation code path to ensure it is guarded by a CAS.
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index be869d4..33aca03 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -109,12 +109,17 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
   DCHECK(utf != nullptr);
-  size_t char_count = CountModifiedUtf8Chars(utf);
-  return AllocFromModifiedUtf8(self, char_count, utf);
+  size_t byte_count = strlen(utf);
+  size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+  return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+  return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in) {
+                                      const char* utf8_data_in, int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   SetStringCountVisitor visitor(utf16_length);
   String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
@@ -122,7 +127,7 @@
     return nullptr;
   }
   uint16_t* utf16_data_out = string->GetValue();
-  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
   return string;
 }
 
@@ -217,7 +222,7 @@
   const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
-  ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+  ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
   return result;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 80ebd2c..e2cfb8d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -116,6 +116,10 @@
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+                                       const char* utf8_data_in, int32_t utf8_length)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e215994..f068b3e 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -56,9 +56,9 @@
 void Throwable::SetStackState(Object* state) SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(state != nullptr);
   if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   } else {
-    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
   }
 }
 
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0f488dc..6aacc8d 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -60,16 +60,16 @@
 
  private:
   Object* GetStackState() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
   Object* GetStackTrace() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_trace_));
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<Object> backtrace_;  // Note this is Java volatile:
   HeapReference<Throwable> cause_;
   HeapReference<String> detail_message_;
-  HeapReference<Object> stack_state_;  // Note this is Java volatile:
   HeapReference<Object> stack_trace_;
   HeapReference<Object> suppressed_exceptions_;
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4cd3c3d..da6cf1f 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -155,7 +155,9 @@
                                          jstring javaOutputName,
                                          jint flags ATTRIBUTE_UNUSED,
                                          // class_loader will be used for app images.
-                                         jobject class_loader ATTRIBUTE_UNUSED) {
+                                         jobject class_loader ATTRIBUTE_UNUSED,
+                                         // dex_elements will be used for app images.
+                                         jobject dex_elements ATTRIBUTE_UNUSED) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == nullptr) {
     return 0;
@@ -445,7 +447,12 @@
   NATIVE_METHOD(DexFile, getDexOptNeeded,
                 "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
-                "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/ClassLoader;)Ljava/lang/Object;"),
+                "(Ljava/lang/String;"
+                "Ljava/lang/String;"
+                "I"
+                "Ljava/lang/ClassLoader;"
+                "[Ldalvik/system/DexPathList$Element;"
+                ")Ljava/lang/Object;"),
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4c5dc3a..424cc11 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -562,18 +562,29 @@
 
 /*
  * This is called by the framework when it knows the application directory and
- * process name.  We use this information to start up the sampling profiler for
- * for ART.
+ * process name.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
-                                      jstring appDir ATTRIBUTE_UNUSED,
-                                      jstring procName ATTRIBUTE_UNUSED) {
-  const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
-  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+static void VMRuntime_registerAppInfo(JNIEnv* env,
+                                      jclass clazz ATTRIBUTE_UNUSED,
+                                      jstring pkg_name,
+                                      jstring app_dir,
+                                      jobjectArray code_paths) {
+  std::vector<std::string> code_paths_vec;
+  int code_paths_length = env->GetArrayLength(code_paths);
+  for (int i = 0; i < code_paths_length; i++) {
+    jstring code_path = reinterpret_cast<jstring>(env->GetObjectArrayElement(code_paths, i));
+    const char* raw_code_path = env->GetStringUTFChars(code_path, nullptr);
+    code_paths_vec.push_back(raw_code_path);
+    env->ReleaseStringUTFChars(code_path, raw_code_path);
+  }
 
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
+  const char* raw_pkg_name = env->GetStringUTFChars(pkg_name, nullptr);
+  std::string profile_file = StringPrintf("%s/code_cache/%s.prof", raw_app_dir, raw_pkg_name);
+  env->ReleaseStringUTFChars(pkg_name, raw_pkg_name);
+  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
 
-  env->ReleaseStringUTFChars(pkgName, pkgNameChars);
+  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
@@ -630,7 +641,7 @@
   NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo,
-                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
+                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"),
   NATIVE_METHOD(VMRuntime, isBootClassPathOnDisk, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(VMRuntime, getCurrentInstructionSet, "()Ljava/lang/String;"),
 };
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index ae1a4d7..67d825e 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -129,7 +129,11 @@
   return reinterpret_cast<jlong>(ThreadForEnv(env));
 }
 
-static void ZygoteHooks_nativePostForkChild(JNIEnv* env, jclass, jlong token, jint debug_flags,
+static void ZygoteHooks_nativePostForkChild(JNIEnv* env,
+                                            jclass,
+                                            jlong token,
+                                            jint debug_flags,
+                                            jboolean is_system_server,
                                             jstring instruction_set) {
   Thread* thread = reinterpret_cast<Thread*>(token);
   // Our system thread ID, etc, has changed so reset Thread state.
@@ -174,22 +178,24 @@
     }
   }
 
-  if (instruction_set != nullptr) {
+  if (instruction_set != nullptr && !is_system_server) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
     Runtime::NativeBridgeAction action = Runtime::NativeBridgeAction::kUnload;
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
-    Runtime::Current()->InitNonZygoteOrPostFork(env, action, isa_string.c_str());
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, action, isa_string.c_str());
   } else {
-    Runtime::Current()->InitNonZygoteOrPostFork(env, Runtime::NativeBridgeAction::kUnload, nullptr);
+    Runtime::Current()->InitNonZygoteOrPostFork(
+        env, is_system_server, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
 }
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(ZygoteHooks, nativePreFork, "()J"),
-  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JILjava/lang/String;)V"),
+  NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JIZLjava/lang/String;)V"),
 };
 
 void register_dalvik_system_ZygoteHooks(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5e42392..1977481 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -439,16 +439,9 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
     auto modifiers = m.GetAccessFlags();
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      ++num_methods;
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
+    // Add non-constructor declared methods.
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
       ++num_methods;
@@ -457,22 +450,9 @@
   auto ret = hs.NewHandle(mirror::ObjectArray<mirror::Method>::Alloc(
       soa.Self(), mirror::Method::ArrayClass(), num_methods));
   num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
     auto modifiers = m.GetAccessFlags();
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
-      if (method == nullptr) {
-        soa.Self()->AssertPendingException();
-        return nullptr;
-      }
-      ret->SetWithoutChecks<false>(num_methods++, method);
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
       auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
       if (method == nullptr) {
@@ -673,7 +653,8 @@
     }
   }
   auto* constructor = klass->GetDeclaredConstructor(
-      soa.Self(), NullHandle<mirror::ObjectArray<mirror::Class>>());
+      soa.Self(),
+      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>());
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 856a3e7..f42a17d 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -52,10 +52,10 @@
   exit(status);
 }
 
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) {
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
 #ifdef __ANDROID__
-  if (javaLdLibraryPathJstr != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr);
+  if (javaLdLibraryPath != nullptr) {
+    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
     if (ldLibraryPath.c_str() != nullptr) {
       android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
     }
@@ -63,23 +63,37 @@
 
 #else
   LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPathJstr, env);
+  UNUSED(javaLdLibraryPath, env);
 #endif
 }
 
 static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPathJstr) {
+                                  jboolean isSharedNamespace, jstring javaLibrarySearchPath,
+                                  jstring javaLibraryPermittedPath) {
   ScopedUtfChars filename(env, javaFilename);
   if (filename.c_str() == nullptr) {
     return nullptr;
   }
 
-  SetLdLibraryPath(env, javaLdLibraryPathJstr);
+  int32_t target_sdk_version = Runtime::Current()->GetTargetSdkVersion();
+
+  // Starting with N nativeLoad uses classloader local
+  // linker namespace instead of global LD_LIBRARY_PATH
+  // (23 is Marshmallow)
+  if (target_sdk_version <= INT_MAX) {
+    SetLdLibraryPath(env, javaLibrarySearchPath);
+  }
 
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader, &error_msg);
+    bool success = vm->LoadNativeLibrary(env,
+                                         filename.c_str(),
+                                         javaLoader,
+                                         isSharedNamespace == JNI_TRUE,
+                                         javaLibrarySearchPath,
+                                         javaLibraryPermittedPath,
+                                         &error_msg);
     if (success) {
       return nullptr;
     }
@@ -107,7 +121,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;ZLjava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index c75ff78..13edd67 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -89,7 +89,7 @@
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
     case kWaitingForVisitObjects:         return kJavaWaiting;
-    case kWaitingWeakGcRootRead:          return kJavaWaiting;
+    case kWaitingWeakGcRootRead:          return kJavaRunnable;
     case kWaitingForGcThreadFlip:         return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 45b9484..ddcaade 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -86,7 +86,7 @@
  * with an interface, array, or primitive class. If this is coming from
  * native, it is OK to avoid access checks since JNI does not enforce them.
  */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
+static jobject Constructor_newInstance0(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Constructor* m = soa.Decode<mirror::Constructor*>(javaMethod);
   StackHandleScope<1> hs(soa.Self());
@@ -99,7 +99,9 @@
   }
   // Verify that we can access the class.
   if (!m->IsAccessible() && !c->IsPublic()) {
-    auto* caller = GetCallingClass(soa.Self(), 1);
+    // Go 2 frames back, this method is always called from newInstance0, which is called from
+    // Constructor.newInstance(Object... args).
+    auto* caller = GetCallingClass(soa.Self(), 2);
     // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
     // access checks anyways. TODO: Investigate if this the correct behavior.
     if (caller != nullptr && !caller->CanAccess(c.Get())) {
@@ -127,7 +129,7 @@
 
   // String constructor is replaced by a StringFactory method in InvokeMethod.
   if (c->IsStringClass()) {
-    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 1);
+    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 2);
   }
 
   mirror::Object* receiver =
@@ -136,11 +138,18 @@
     return nullptr;
   }
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 1);
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 2);
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
+static jobject Constructor_newInstanceFromSerialization(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED,
+                                                        jclass ctorClass, jclass allocClass) {
+    jmethodID ctor = env->GetMethodID(ctorClass, "<init>", "()V");
+    DCHECK(ctor != NULL);
+    return env->NewObject(allocClass, ctor);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Constructor, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
@@ -149,7 +158,8 @@
   NATIVE_METHOD(Constructor, getParameterAnnotationsNative,
                 "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Constructor, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
-  NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstance0, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstanceFromSerialization, "!(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index caacba6..d7cf62e 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -71,7 +71,7 @@
     mirror::Class* klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
-    for (const auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+    for (const auto& m : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
       if (&m == method) {
         throws_index = i;
         break;
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 770644c..8a2c7e4 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -15,7 +15,7 @@
  */
 
 #include "sun_misc_Unsafe.h"
-
+#include "common_throws.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/array.h"
@@ -23,6 +23,10 @@
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
 
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
 namespace art {
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
@@ -52,6 +56,17 @@
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   // JNI must use non transactional mode.
+  if (kUseReadBarrier) {
+    // Need to make sure the reference stored in the field is a to-space one before attempting the
+    // CAS or the CAS could fail incorrectly.
+    mirror::HeapReference<mirror::Object>* field_addr =
+        reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+            reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+        obj,
+        MemberOffset(offset),
+        field_addr);
+  }
   bool success = obj->CasFieldStrongSequentiallyConsistentObject<false>(MemberOffset(offset),
                                                                         expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
@@ -185,6 +200,279 @@
   return Primitive::ComponentSize(primitive_type);
 }
 
+static jint Unsafe_addressSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sizeof(void*);
+}
+
+static jint Unsafe_pageSize(JNIEnv* env ATTRIBUTE_UNUSED, jobject ob ATTRIBUTE_UNUSED) {
+  return sysconf(_SC_PAGESIZE);
+}
+
+static jlong Unsafe_allocateMemory(JNIEnv* env, jobject, jlong bytes) {
+  ScopedFastNativeObjectAccess soa(env);
+  // bytes is nonnegative and fits into size_t
+  if (bytes < 0 || bytes != (jlong)(size_t) bytes) {
+    ThrowIllegalAccessException("wrong number of bytes");
+    return 0;
+  }
+  void* mem = malloc(bytes);
+  if (mem == nullptr) {
+    soa.Self()->ThrowOutOfMemoryError("native alloc");
+    return 0;
+  }
+  return (uintptr_t) mem;
+}
+
+static void Unsafe_freeMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  free(reinterpret_cast<void*>(static_cast<uintptr_t>(address)));
+}
+
+static void Unsafe_setMemory(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong bytes, jbyte value) {
+  memset(reinterpret_cast<void*>(static_cast<uintptr_t>(address)), value, bytes);
+}
+
+static jbyte Unsafe_getByte$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jbyte*>(address);
+}
+
+static void Unsafe_putByte$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jbyte value) {
+  *reinterpret_cast<jbyte*>(address) = value;
+}
+
+static jshort Unsafe_getShort$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jshort*>(address);
+}
+
+static void Unsafe_putShort$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jshort value) {
+  *reinterpret_cast<jshort*>(address) = value;
+}
+
+static jchar Unsafe_getChar$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jchar*>(address);
+}
+
+static void Unsafe_putChar$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jchar value) {
+  *reinterpret_cast<jchar*>(address) = value;
+}
+
+static jint Unsafe_getInt$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jint*>(address);
+}
+
+static void Unsafe_putInt$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jint value) {
+  *reinterpret_cast<jint*>(address) = value;
+}
+
+static jlong Unsafe_getLong$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jlong*>(address);
+}
+
+static void Unsafe_putLong$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jlong value) {
+  *reinterpret_cast<jlong*>(address) = value;
+}
+
+static jfloat Unsafe_getFloat$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jfloat*>(address);
+}
+
+static void Unsafe_putFloat$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jfloat value) {
+  *reinterpret_cast<jfloat*>(address) = value;
+}
+static jdouble Unsafe_getDouble$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address) {
+  return *reinterpret_cast<jdouble*>(address);
+}
+
+static void Unsafe_putDouble$(JNIEnv* env ATTRIBUTE_UNUSED, jobject, jlong address, jdouble value) {
+  *reinterpret_cast<jdouble*>(address) = value;
+}
+
+static void Unsafe_copyMemory(JNIEnv *env, jobject unsafe ATTRIBUTE_UNUSED, jlong src,
+                              jlong dst, jlong size) {
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ScopedFastNativeObjectAccess soa(env);
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    memcpy(reinterpret_cast<void *>(dst), reinterpret_cast<void *>(src), sz);
+}
+
+template<typename T>
+static void copyToArray(jlong srcAddr, mirror::PrimitiveArray<T>* array,
+                        size_t array_offset,
+                        size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    const T* src = reinterpret_cast<T*>(srcAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        array->Set(i + of, *(src + i));
+    }
+}
+
+template<typename T>
+static void copyFromArray(jlong dstAddr, mirror::PrimitiveArray<T>* array,
+                          size_t array_offset,
+                          size_t size)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
+    T* dst = reinterpret_cast<T*>(dstAddr);
+    size_t sz = size / sizeof(T);
+    size_t of = array_offset / sizeof(T);
+    for (size_t i = 0; i < sz; ++i) {
+        *(dst + i) = array->Get(i + of);
+    }
+}
+
+static void Unsafe_copyMemoryToPrimitiveArray(JNIEnv *env,
+                                              jobject unsafe ATTRIBUTE_UNUSED,
+                                              jlong srcAddr,
+                                              jobject dstObj,
+                                              jlong dstOffset,
+                                              jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t dst_offset = (size_t)dstOffset;
+    mirror::Object* dst = soa.Decode<mirror::Object*>(dstObj);
+    mirror::Class* component_type = dst->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyToArray(srcAddr, dst->AsByteSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyToArray(srcAddr, dst->AsShortSizedArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyToArray(srcAddr, dst->AsIntArray(), dst_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyToArray(srcAddr, dst->AsLongArray(), dst_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+
+static void Unsafe_copyMemoryFromPrimitiveArray(JNIEnv *env,
+                                                jobject unsafe ATTRIBUTE_UNUSED,
+                                                jobject srcObj,
+                                                jlong srcOffset,
+                                                jlong dstAddr,
+                                                jlong size) {
+    ScopedObjectAccess soa(env);
+    if (size == 0) {
+        return;
+    }
+    // size is nonnegative and fits into size_t
+    if (size < 0 || size != (jlong)(size_t) size) {
+        ThrowIllegalAccessException("wrong number of bytes");
+    }
+    size_t sz = (size_t)size;
+    size_t src_offset = (size_t)srcOffset;
+    mirror::Object* src = soa.Decode<mirror::Object*>(srcObj);
+    mirror::Class* component_type = src->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
+        copyFromArray(dstAddr, src->AsByteSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
+        copyFromArray(dstAddr, src->AsShortSizedArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
+        copyFromArray(dstAddr, src->AsIntArray(), src_offset, sz);
+    } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
+        copyFromArray(dstAddr, src->AsLongArray(), src_offset, sz);
+    } else {
+        ThrowIllegalAccessException("not a primitive array");
+    }
+}
+static jboolean Unsafe_getBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldBoolean(MemberOffset(offset));
+}
+
+static void Unsafe_putBoolean(JNIEnv* env, jobject, jobject javaObj, jlong offset, jboolean newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode (SetField8 is non-transactional).
+    obj->SetFieldBoolean<false>(MemberOffset(offset), newValue);
+}
+
+static jbyte Unsafe_getByte(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldByte(MemberOffset(offset));
+}
+
+static void Unsafe_putByte(JNIEnv* env, jobject, jobject javaObj, jlong offset, jbyte newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldByte<false>(MemberOffset(offset), newValue);
+}
+
+static jchar Unsafe_getChar(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldChar(MemberOffset(offset));
+}
+
+static void Unsafe_putChar(JNIEnv* env, jobject, jobject javaObj, jlong offset, jchar newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldChar<false>(MemberOffset(offset), newValue);
+}
+
+static jshort Unsafe_getShort(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    return obj->GetFieldShort(MemberOffset(offset));
+}
+
+static void Unsafe_putShort(JNIEnv* env, jobject, jobject javaObj, jlong offset, jshort newValue) {
+    ScopedFastNativeObjectAccess soa(env);
+    mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+    // JNI must use non transactional mode.
+    obj->SetFieldShort<false>(MemberOffset(offset), newValue);
+}
+
+static jfloat Unsafe_getFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t val; jfloat converted;} conv;
+  conv.val = obj->GetField32(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putFloat(JNIEnv* env, jobject, jobject javaObj, jlong offset, jfloat newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int32_t converted; jfloat val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField32<false>(MemberOffset(offset), conv.converted);
+}
+
+static jdouble Unsafe_getDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t val; jdouble converted;} conv;
+  conv.val = obj->GetField64(MemberOffset(offset));
+  return conv.converted;
+}
+
+static void Unsafe_putDouble(JNIEnv* env, jobject, jobject javaObj, jlong offset, jdouble newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  union {int64_t converted; jdouble val;} conv;
+  conv.val = newValue;
+  // JNI must use non transactional mode.
+  obj->SetField64<false>(MemberOffset(offset), conv.converted);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -206,6 +494,40 @@
   NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
   NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
   NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
+  NATIVE_METHOD(Unsafe, addressSize, "!()I"),
+  NATIVE_METHOD(Unsafe, pageSize, "!()I"),
+  NATIVE_METHOD(Unsafe, allocateMemory, "!(J)J"),
+  NATIVE_METHOD(Unsafe, freeMemory, "!(J)V"),
+  NATIVE_METHOD(Unsafe, setMemory, "!(JJB)V"),
+  NATIVE_METHOD(Unsafe, getByte$, "!(J)B"),
+  NATIVE_METHOD(Unsafe, putByte$, "!(JB)V"),
+  NATIVE_METHOD(Unsafe, getShort$, "!(J)S"),
+  NATIVE_METHOD(Unsafe, putShort$, "!(JS)V"),
+  NATIVE_METHOD(Unsafe, getChar$, "!(J)C"),
+  NATIVE_METHOD(Unsafe, putChar$, "!(JC)V"),
+  NATIVE_METHOD(Unsafe, getInt$, "!(J)I"),
+  NATIVE_METHOD(Unsafe, putInt$, "!(JI)V"),
+  NATIVE_METHOD(Unsafe, getLong$, "!(J)J"),
+  NATIVE_METHOD(Unsafe, putLong$, "!(JJ)V"),
+  NATIVE_METHOD(Unsafe, getFloat$, "!(J)F"),
+  NATIVE_METHOD(Unsafe, putFloat$, "!(JF)V"),
+  NATIVE_METHOD(Unsafe, getDouble$, "!(J)D"),
+  NATIVE_METHOD(Unsafe, putDouble$, "!(JD)V"),
+  NATIVE_METHOD(Unsafe, copyMemory, "!(JJJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryToPrimitiveArray, "!(JLjava/lang/Object;JJ)V"),
+  NATIVE_METHOD(Unsafe, copyMemoryFromPrimitiveArray, "!(Ljava/lang/Object;JJJ)V"),
+  NATIVE_METHOD(Unsafe, getBoolean, "!(Ljava/lang/Object;J)Z"),
+  NATIVE_METHOD(Unsafe, getByte, "!(Ljava/lang/Object;J)B"),
+  NATIVE_METHOD(Unsafe, getChar, "!(Ljava/lang/Object;J)C"),
+  NATIVE_METHOD(Unsafe, getShort, "!(Ljava/lang/Object;J)S"),
+  NATIVE_METHOD(Unsafe, getFloat, "!(Ljava/lang/Object;J)F"),
+  NATIVE_METHOD(Unsafe, getDouble, "!(Ljava/lang/Object;J)D"),
+  NATIVE_METHOD(Unsafe, putBoolean, "!(Ljava/lang/Object;JZ)V"),
+  NATIVE_METHOD(Unsafe, putByte, "!(Ljava/lang/Object;JB)V"),
+  NATIVE_METHOD(Unsafe, putChar, "!(Ljava/lang/Object;JC)V"),
+  NATIVE_METHOD(Unsafe, putShort, "!(Ljava/lang/Object;JS)V"),
+  NATIVE_METHOD(Unsafe, putFloat, "!(Ljava/lang/Object;JF)V"),
+  NATIVE_METHOD(Unsafe, putDouble, "!(Ljava/lang/Object;JD)V"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 46cc5aa..61a1085 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -45,10 +45,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t native_method_count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    native_method_count += m.IsNative() ? 1u : 0u;
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     native_method_count += m.IsNative() ? 1u : 0u;
   }
   return native_method_count;
@@ -63,19 +60,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    if (m.IsNative()) {
-      if (count < method_count) {
-        methods[count].name = m.GetName();
-        methods[count].signature = m.GetShorty();
-        methods[count].fnPtr = m.GetEntryPointFromJni();
-        count++;
-      } else {
-        LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(&m);
-      }
-    }
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     if (m.IsNative()) {
       if (count < method_count) {
         methods[count].name = m.GetName();
diff --git a/runtime/noop_compiler_callbacks.h b/runtime/noop_compiler_callbacks.h
index 1cbf2bb..02081cb 100644
--- a/runtime/noop_compiler_callbacks.h
+++ b/runtime/noop_compiler_callbacks.h
@@ -26,8 +26,7 @@
   NoopCompilerCallbacks() : CompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp) {}
   ~NoopCompilerCallbacks() {}
 
-  bool MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
-    return true;
+  void MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {
   }
 
   void ClassRejected(ClassReference ref ATTRIBUTE_UNUSED) OVERRIDE {}
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 40aca0d..c787b9a 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -45,9 +45,7 @@
 
 OatHeader* OatHeader::Create(InstructionSet instruction_set,
                              const InstructionSetFeatures* instruction_set_features,
-                             const std::vector<const DexFile*>* dex_files,
-                             uint32_t image_file_location_oat_checksum,
-                             uint32_t image_file_location_oat_data_begin,
+                             uint32_t dex_file_count,
                              const SafeMap<std::string, std::string>* variable_data) {
   // Estimate size of optional data.
   size_t needed_size = ComputeOatHeaderSize(variable_data);
@@ -58,18 +56,29 @@
   // Create the OatHeader in-place.
   return new (memory) OatHeader(instruction_set,
                                 instruction_set_features,
-                                dex_files,
-                                image_file_location_oat_checksum,
-                                image_file_location_oat_data_begin,
+                                dex_file_count,
                                 variable_data);
 }
 
 OatHeader::OatHeader(InstructionSet instruction_set,
                      const InstructionSetFeatures* instruction_set_features,
-                     const std::vector<const DexFile*>* dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uint32_t image_file_location_oat_data_begin,
-                     const SafeMap<std::string, std::string>* variable_data) {
+                     uint32_t dex_file_count,
+                     const SafeMap<std::string, std::string>* variable_data)
+    : adler32_checksum_(adler32(0L, Z_NULL, 0)),
+      instruction_set_(instruction_set),
+      instruction_set_features_bitmap_(instruction_set_features->AsBitmap()),
+      dex_file_count_(dex_file_count),
+      executable_offset_(0),
+      interpreter_to_interpreter_bridge_offset_(0),
+      interpreter_to_compiled_code_bridge_offset_(0),
+      jni_dlsym_lookup_offset_(0),
+      quick_generic_jni_trampoline_offset_(0),
+      quick_imt_conflict_trampoline_offset_(0),
+      quick_resolution_trampoline_offset_(0),
+      quick_to_interpreter_bridge_offset_(0),
+      image_patch_delta_(0),
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0) {
   // Don't want asserts in header as they would be checked in each file that includes it. But the
   // fields are private, so we check inside a method.
   static_assert(sizeof(magic_) == sizeof(kOatMagic),
@@ -79,46 +88,11 @@
 
   memcpy(magic_, kOatMagic, sizeof(kOatMagic));
   memcpy(version_, kOatVersion, sizeof(kOatVersion));
-  executable_offset_ = 0;
-  image_patch_delta_ = 0;
-
-  adler32_checksum_ = adler32(0L, Z_NULL, 0);
 
   CHECK_NE(instruction_set, kNone);
-  instruction_set_ = instruction_set;
-  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
-
-  instruction_set_features_bitmap_ = instruction_set_features->AsBitmap();
-  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
-
-  dex_file_count_ = dex_files->size();
-  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
-
-  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
-  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
-
-  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
-  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
-  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
 
   // Flatten the map. Will also update variable_size_data_size_.
   Flatten(variable_data);
-
-  // Update checksum for variable data size.
-  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
-
-  // Update for data, if existing.
-  if (key_value_store_size_ > 0U) {
-    UpdateChecksum(&key_value_store_, key_value_store_size_);
-  }
-
-  interpreter_to_interpreter_bridge_offset_ = 0;
-  interpreter_to_compiled_code_bridge_offset_ = 0;
-  jni_dlsym_lookup_offset_ = 0;
-  quick_generic_jni_trampoline_offset_ = 0;
-  quick_imt_conflict_trampoline_offset_ = 0;
-  quick_resolution_trampoline_offset_ = 0;
-  quick_to_interpreter_bridge_offset_ = 0;
 }
 
 bool OatHeader::IsValid() const {
@@ -175,6 +149,37 @@
   return adler32_checksum_;
 }
 
+void OatHeader::UpdateChecksumWithHeaderData() {
+  UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
+  UpdateChecksum(&instruction_set_features_bitmap_, sizeof(instruction_set_features_bitmap_));
+  UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
+  UpdateChecksum(&image_file_location_oat_checksum_, sizeof(image_file_location_oat_checksum_));
+  UpdateChecksum(&image_file_location_oat_data_begin_, sizeof(image_file_location_oat_data_begin_));
+
+  // Update checksum for variable data size.
+  UpdateChecksum(&key_value_store_size_, sizeof(key_value_store_size_));
+
+  // Update for data, if existing.
+  if (key_value_store_size_ > 0U) {
+    UpdateChecksum(&key_value_store_, key_value_store_size_);
+  }
+
+  UpdateChecksum(&executable_offset_, sizeof(executable_offset_));
+  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_,
+                 sizeof(interpreter_to_interpreter_bridge_offset_));
+  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_,
+                 sizeof(interpreter_to_compiled_code_bridge_offset_));
+  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(jni_dlsym_lookup_offset_));
+  UpdateChecksum(&quick_generic_jni_trampoline_offset_,
+                 sizeof(quick_generic_jni_trampoline_offset_));
+  UpdateChecksum(&quick_imt_conflict_trampoline_offset_,
+                 sizeof(quick_imt_conflict_trampoline_offset_));
+  UpdateChecksum(&quick_resolution_trampoline_offset_,
+                 sizeof(quick_resolution_trampoline_offset_));
+  UpdateChecksum(&quick_to_interpreter_bridge_offset_,
+                 sizeof(quick_to_interpreter_bridge_offset_));
+}
+
 void OatHeader::UpdateChecksum(const void* data, size_t length) {
   DCHECK(IsValid());
   const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
@@ -205,7 +210,6 @@
   DCHECK_EQ(executable_offset_, 0U);
 
   executable_offset_ = executable_offset;
-  UpdateChecksum(&executable_offset_, sizeof(executable_offset));
 }
 
 const void* OatHeader::GetInterpreterToInterpreterBridge() const {
@@ -225,7 +229,6 @@
   DCHECK_EQ(interpreter_to_interpreter_bridge_offset_, 0U) << offset;
 
   interpreter_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetInterpreterToCompiledCodeBridge() const {
@@ -244,7 +247,6 @@
   DCHECK_EQ(interpreter_to_compiled_code_bridge_offset_, 0U) << offset;
 
   interpreter_to_compiled_code_bridge_offset_ = offset;
-  UpdateChecksum(&interpreter_to_compiled_code_bridge_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetJniDlsymLookup() const {
@@ -263,7 +265,6 @@
   DCHECK_EQ(jni_dlsym_lookup_offset_, 0U) << offset;
 
   jni_dlsym_lookup_offset_ = offset;
-  UpdateChecksum(&jni_dlsym_lookup_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickGenericJniTrampoline() const {
@@ -282,7 +283,6 @@
   DCHECK_EQ(quick_generic_jni_trampoline_offset_, 0U) << offset;
 
   quick_generic_jni_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_generic_jni_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickImtConflictTrampoline() const {
@@ -301,7 +301,6 @@
   DCHECK_EQ(quick_imt_conflict_trampoline_offset_, 0U) << offset;
 
   quick_imt_conflict_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_imt_conflict_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickResolutionTrampoline() const {
@@ -320,7 +319,6 @@
   DCHECK_EQ(quick_resolution_trampoline_offset_, 0U) << offset;
 
   quick_resolution_trampoline_offset_ = offset;
-  UpdateChecksum(&quick_resolution_trampoline_offset_, sizeof(offset));
 }
 
 const void* OatHeader::GetQuickToInterpreterBridge() const {
@@ -339,7 +337,6 @@
   DCHECK_EQ(quick_to_interpreter_bridge_offset_, 0U) << offset;
 
   quick_to_interpreter_bridge_offset_ = offset;
-  UpdateChecksum(&quick_to_interpreter_bridge_offset_, sizeof(offset));
 }
 
 int32_t OatHeader::GetImagePatchDelta() const {
@@ -367,11 +364,22 @@
   return image_file_location_oat_checksum_;
 }
 
+void OatHeader::SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum) {
+  CHECK(IsValid());
+  image_file_location_oat_checksum_ = image_file_location_oat_checksum;
+}
+
 uint32_t OatHeader::GetImageFileLocationOatDataBegin() const {
   CHECK(IsValid());
   return image_file_location_oat_data_begin_;
 }
 
+void OatHeader::SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin) {
+  CHECK(IsValid());
+  CHECK_ALIGNED(image_file_location_oat_data_begin, kPageSize);
+  image_file_location_oat_data_begin_ = image_file_location_oat_data_begin;
+}
+
 uint32_t OatHeader::GetKeyValueStoreSize() const {
   CHECK(IsValid());
   return key_value_store_size_;
diff --git a/runtime/oat.h b/runtime/oat.h
index 5b780c3..5ed1977 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '3', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
@@ -45,15 +45,14 @@
 
   static OatHeader* Create(InstructionSet instruction_set,
                            const InstructionSetFeatures* instruction_set_features,
-                           const std::vector<const DexFile*>* dex_files,
-                           uint32_t image_file_location_oat_checksum,
-                           uint32_t image_file_location_oat_data_begin,
+                           uint32_t dex_file_count,
                            const SafeMap<std::string, std::string>* variable_data);
 
   bool IsValid() const;
   std::string GetValidationErrorMessage() const;
   const char* GetMagic() const;
   uint32_t GetChecksum() const;
+  void UpdateChecksumWithHeaderData();
   void UpdateChecksum(const void* data, size_t length);
   uint32_t GetDexFileCount() const {
     DCHECK(IsValid());
@@ -92,8 +91,11 @@
 
   InstructionSet GetInstructionSet() const;
   uint32_t GetInstructionSetFeaturesBitmap() const;
+
   uint32_t GetImageFileLocationOatChecksum() const;
+  void SetImageFileLocationOatChecksum(uint32_t image_file_location_oat_checksum);
   uint32_t GetImageFileLocationOatDataBegin() const;
+  void SetImageFileLocationOatDataBegin(uint32_t image_file_location_oat_data_begin);
 
   uint32_t GetKeyValueStoreSize() const;
   const uint8_t* GetKeyValueStore() const;
@@ -107,9 +109,7 @@
  private:
   OatHeader(InstructionSet instruction_set,
             const InstructionSetFeatures* instruction_set_features,
-            const std::vector<const DexFile*>* dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_data_begin,
+            uint32_t dex_file_count,
             const SafeMap<std::string, std::string>* variable_data);
 
   // Returns true if the value of the given key is "true", false otherwise.
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 680f4ac..83e594b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -52,10 +52,10 @@
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +64,140 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +220,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -630,6 +422,486 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+static bool RegisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
+    }
+    return false;
+  }
+  return true;
+}
+
+static void UnregisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
+    }
+  }
+}
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        first_oat_(RegisterOatFileLocation(filename)) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    UnregisterOatFileLocation(GetLocation());
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // Track the registration status (= was this the first oat file) for the location.
+  const bool first_oat_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
+    // stored in the first_oat_ flag.
+    if (!first_oat_) {
+      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef __ANDROID__
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    UNUSED(oat_file_begin);
+#endif
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+  } context = { Begin(), &dlopen_mmaps_ };
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file, oat_file_begin, writable, executable, error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      std::string* error_msg) {
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             std::string* error_msg) {
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+  std::unique_ptr<OatFile> ret;
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+  }
+
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 0a77654..dbd7541 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,7 +40,7 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
 
@@ -74,7 +74,7 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
@@ -85,12 +85,6 @@
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
-
   const std::string& GetLocation() const {
     return location_;
   }
@@ -260,35 +254,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +278,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -356,6 +313,7 @@
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -426,6 +384,7 @@
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 8c7efb2..40cd50b 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -1025,7 +1025,7 @@
 
   // We use the lib core dex file, because it's large, and hopefully should
   // take a while to generate.
-  Copy(GetLibCoreDexFileName(), dex_location);
+  Copy(GetLibCoreDexFileNames()[0], dex_location);
 
   const int kNumThreads = 32;
   Thread* self = Thread::Current();
diff --git a/runtime/openjdkjvm/NOTICE b/runtime/openjdkjvm/NOTICE
new file mode 100644
index 0000000..700a206
--- /dev/null
+++ b/runtime/openjdkjvm/NOTICE
@@ -0,0 +1,29 @@
+Copyright (C) 2014 The Android Open Source Project
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This file implements interfaces from the file jvm.h. This implementation
+is licensed under the same terms as the file jvm.h.  The
+copyright and license information for the file jvm.h follows.
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
new file mode 100644
index 0000000..ab0d934
--- /dev/null
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -0,0 +1,540 @@
+/* Copyright (C) 2014 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvm.h. This implementation
+ * is licensed under the same terms as the file jvm.h.  The
+ * copyright and license information for the file jvm.h follows.
+ *
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * Services that OpenJDK expects the VM to provide.
+ */
+#include<stdio.h>
+#include <dlfcn.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "runtime.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "mirror/class_loader.h"
+#include "verify_object-inl.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
+#include "jni_internal.h"
+#include "mirror/string-inl.h"
+#include "native/scoped_fast_native_object_access.h"
+#include "ScopedLocalRef.h"
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#ifdef __ANDROID__
+// This function is provided by android linker.
+extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
+#endif  // __ANDROID__
+
+#undef LOG_TAG
+#define LOG_TAG "artopenjdk"
+
+using art::DEBUG;
+using art::WARNING;
+using art::VERBOSE;
+using art::INFO;
+using art::ERROR;
+using art::FATAL;
+
+/* posix open() with extensions; used by e.g. ZipFile */
+JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
+    LOG(DEBUG) << "JVM_Open fname='" << fname << "', flags=" << flags << ", mode=" << mode;
+
+    /*
+     * The call is expected to handle JVM_O_DELETE, which causes the file
+     * to be removed after it is opened.  Also, some code seems to
+     * want the special return value JVM_EEXIST if the file open fails
+     * due to O_EXCL.
+     */
+    int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
+    if (fd < 0) {
+        int err = errno;
+        LOG(DEBUG) << "open(" << fname << ") failed: " << strerror(errno);
+        if (err == EEXIST) {
+            return JVM_EEXIST;
+        } else {
+            return -1;
+        }
+    }
+
+    if (flags & JVM_O_DELETE) {
+        LOG(DEBUG) << "Deleting '" << fname << "' after open\n";
+        if (unlink(fname) != 0) {
+            LOG(WARNING) << "Post-open deletion of '" << fname << "' failed: " << strerror(errno);
+        }
+        /* ignore */
+    }
+
+    LOG(VERBOSE) << "open(" << fname << ") --> " << fd;
+    return fd;
+}
+
+/* posix close() */
+JNIEXPORT jint JVM_Close(jint fd) {
+    LOG(DEBUG) << "JVM_Close fd=" << fd;
+    // don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR
+    return close(fd);
+}
+
+/* posix read() */
+JNIEXPORT jint JVM_Read(jint fd, char* buf, jint nbytes) {
+    LOG(DEBUG) << "JVM_Read fd=" << fd << ", buf='" << buf << "', nbytes=" << nbytes;
+    return TEMP_FAILURE_RETRY(read(fd, buf, nbytes));
+}
+
+/* posix write(); is used to write messages to stderr */
+JNIEXPORT jint JVM_Write(jint fd, char* buf, jint nbytes) {
+    LOG(DEBUG) << "JVM_Write fd=" << fd << ", buf='" << buf << "', nbytes=" << nbytes;
+    return TEMP_FAILURE_RETRY(write(fd, buf, nbytes));
+}
+
+/* posix lseek() */
+JNIEXPORT jlong JVM_Lseek(jint fd, jlong offset, jint whence) {
+    LOG(DEBUG) << "JVM_Lseek fd=" << fd << ", offset=" << offset << ", whence=" << whence;
+    return TEMP_FAILURE_RETRY(lseek(fd, offset, whence));
+}
+
+/*
+ * "raw monitors" seem to be expected to behave like non-recursive pthread
+ * mutexes.  They're used by ZipFile.
+ */
+JNIEXPORT void* JVM_RawMonitorCreate(void) {
+    LOG(DEBUG) << "JVM_RawMonitorCreate";
+    pthread_mutex_t* newMutex =
+        reinterpret_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    pthread_mutex_init(newMutex, NULL);
+    return newMutex;
+}
+
+JNIEXPORT void JVM_RawMonitorDestroy(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorDestroy mon=" << mon;
+    pthread_mutex_destroy(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT jint JVM_RawMonitorEnter(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorEnter mon=" << mon;
+    return pthread_mutex_lock(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT void JVM_RawMonitorExit(void* mon) {
+    LOG(DEBUG) << "JVM_RawMonitorExit mon=" << mon;
+    pthread_mutex_unlock(reinterpret_cast<pthread_mutex_t*>(mon));
+}
+
+JNIEXPORT char* JVM_NativePath(char* path) {
+    LOG(DEBUG) << "JVM_NativePath path='" << path << "'";
+    return path;
+}
+
+JNIEXPORT jint JVM_GetLastErrorString(char* buf, int len) {
+#if defined(__GLIBC__) || defined(__BIONIC__)
+  int err = errno;    // grab before JVM_TRACE can trash it
+  LOG(DEBUG) << "JVM_GetLastErrorString buf=" << buf << ", len=" << len;
+
+  if (len == 0) {
+    return 0;
+  }
+
+  char* result = strerror_r(err, buf, len);
+  if (result != buf) {
+    strncpy(buf, result, len);
+    buf[len - 1] = '\0';
+  }
+
+  return strlen(buf);
+#else
+  UNUSED(buf);
+  UNUSED(len);
+  return -1;
+#endif
+}
+
+JNIEXPORT int jio_fprintf(FILE* fp, const char* fmt, ...) {
+    va_list args;
+
+    va_start(args, fmt);
+    int len = jio_vfprintf(fp, fmt, args);
+    va_end(args);
+
+    return len;
+}
+
+JNIEXPORT int jio_vfprintf(FILE* fp, const char* fmt, va_list args) {
+    assert(fp != NULL);
+    return vfprintf(fp, fmt, args);
+}
+
+/* posix fsync() */
+JNIEXPORT jint JVM_Sync(jint fd) {
+    LOG(DEBUG) << "JVM_Sync fd=" << fd;
+    return TEMP_FAILURE_RETRY(fsync(fd));
+}
+
+JNIEXPORT void* JVM_FindLibraryEntry(void* handle, const char* name) {
+    LOG(DEBUG) << "JVM_FindLibraryEntry handle=" << handle << " name=" << name;
+    return dlsym(handle, name);
+}
+
+JNIEXPORT jlong JVM_CurrentTimeMillis(JNIEnv* env, jclass clazz ATTRIBUTE_UNUSED) {
+    LOG(DEBUG) << "JVM_CurrentTimeMillis env=" << env;
+    struct timeval tv;
+
+    gettimeofday(&tv, (struct timezone *) NULL);
+    jlong when = tv.tv_sec * 1000LL + tv.tv_usec / 1000;
+    return when;
+}
+
+JNIEXPORT jint JVM_Socket(jint domain, jint type, jint protocol) {
+    LOG(DEBUG) << "JVM_Socket domain=" << domain << ", type=" << type << ", protocol=" << protocol;
+
+    return TEMP_FAILURE_RETRY(socket(domain, type, protocol));
+}
+
+JNIEXPORT jint JVM_InitializeSocketLibrary() {
+  return 0;
+}
+
+int jio_vsnprintf(char *str, size_t count, const char *fmt, va_list args) {
+  if ((intptr_t)count <= 0) return -1;
+  return vsnprintf(str, count, fmt, args);
+}
+
+int jio_snprintf(char *str, size_t count, const char *fmt, ...) {
+  va_list args;
+  int len;
+  va_start(args, fmt);
+  len = jio_vsnprintf(str, count, fmt, args);
+  va_end(args);
+  return len;
+}
+
+JNIEXPORT jint JVM_SetSockOpt(jint fd, int level, int optname,
+    const char* optval, int optlen) {
+  LOG(DEBUG) << "JVM_SetSockOpt fd=" << fd << ", level=" << level << ", optname=" << optname
+             << ", optval=" << optval << ", optlen=" << optlen;
+  return TEMP_FAILURE_RETRY(setsockopt(fd, level, optname, optval, optlen));
+}
+
+JNIEXPORT jint JVM_SocketShutdown(jint fd, jint howto) {
+  LOG(DEBUG) << "JVM_SocketShutdown fd=" << fd << ", howto=" << howto;
+  return TEMP_FAILURE_RETRY(shutdown(fd, howto));
+}
+
+JNIEXPORT jint JVM_GetSockOpt(jint fd, int level, int optname, char* optval,
+  int* optlen) {
+  LOG(DEBUG) << "JVM_GetSockOpt fd=" << fd << ", level=" << level << ", optname=" << optname
+             << ", optval=" << optval << ", optlen=" << optlen;
+
+  socklen_t len = *optlen;
+  int cc = TEMP_FAILURE_RETRY(getsockopt(fd, level, optname, optval, &len));
+  *optlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_GetSockName(jint fd, struct sockaddr* addr, int* addrlen) {
+  LOG(DEBUG) << "JVM_GetSockName fd=" << fd << ", addr=" << addr << ", addrlen=" << addrlen;
+
+  socklen_t len = *addrlen;
+  int cc = TEMP_FAILURE_RETRY(getsockname(fd, addr, &len));
+  *addrlen = len;
+  return cc;
+}
+
+JNIEXPORT jint JVM_SocketAvailable(jint fd, jint* result) {
+  LOG(DEBUG) << "JVM_SocketAvailable fd=" << fd << ", result=" << result;
+
+  if (TEMP_FAILURE_RETRY(ioctl(fd, FIONREAD, result)) < 0) {
+      LOG(DEBUG) << "ioctl(" << fd << ", FIONREAD) failed: " << strerror(errno);
+      return JNI_FALSE;
+  }
+
+  return JNI_TRUE;
+}
+
+JNIEXPORT jint JVM_Send(jint fd, char* buf, jint nBytes, jint flags) {
+  LOG(DEBUG) << "JVM_Send fd=" << fd << ", buf=" << buf << ", nBytes="
+             << nBytes << ", flags=" << flags;
+
+  return TEMP_FAILURE_RETRY(send(fd, buf, nBytes, flags));
+}
+
+JNIEXPORT jint JVM_SocketClose(jint fd) {
+  LOG(DEBUG) << "JVM_SocketClose fd=" << fd;
+
+    // don't want TEMP_FAILURE_RETRY here -- file is closed even if EINTR
+  return close(fd);
+}
+
+JNIEXPORT jint JVM_Listen(jint fd, jint count) {
+  LOG(DEBUG) << "JVM_Listen fd=" << fd << ", count=" << count;
+
+  return TEMP_FAILURE_RETRY(listen(fd, count));
+}
+
+JNIEXPORT jint JVM_Connect(jint fd, struct sockaddr* addr, jint addrlen) {
+  LOG(DEBUG) << "JVM_Connect fd=" << fd << ", addr=" << addr << ", addrlen=" << addrlen;
+
+  return TEMP_FAILURE_RETRY(connect(fd, addr, addrlen));
+}
+
+JNIEXPORT int JVM_GetHostName(char* name, int namelen) {
+  LOG(DEBUG) << "JVM_GetHostName name=" << name << ", namelen=" << namelen;
+
+  return TEMP_FAILURE_RETRY(gethostname(name, namelen));
+}
+
+JNIEXPORT jstring JVM_InternString(JNIEnv* env, jstring jstr) {
+  LOG(DEBUG) << "JVM_InternString env=" << env << ", jstr=" << jstr;
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::String* s = soa.Decode<art::mirror::String*>(jstr);
+  art::mirror::String* result = s->Intern();
+  return soa.AddLocalReference<jstring>(result);
+}
+
+JNIEXPORT jlong JVM_FreeMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetFreeMemory();
+}
+
+JNIEXPORT jlong JVM_TotalMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetTotalMemory();
+}
+
+JNIEXPORT jlong JVM_MaxMemory(void) {
+  return art::Runtime::Current()->GetHeap()->GetMaxMemory();
+}
+
+JNIEXPORT void JVM_GC(void) {
+  if (art::Runtime::Current()->IsExplicitGcDisabled()) {
+      LOG(INFO) << "Explicit GC skipped.";
+      return;
+  }
+  art::Runtime::Current()->GetHeap()->CollectGarbage(false);
+}
+
+JNIEXPORT __attribute__((noreturn)) void JVM_Exit(jint status) {
+  LOG(INFO) << "System.exit called, status: " << status;
+  art::Runtime::Current()->CallExitHook(status);
+  exit(status);
+}
+
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
+#ifdef __ANDROID__
+  if (javaLdLibraryPath != nullptr) {
+    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
+    if (ldLibraryPath.c_str() != nullptr) {
+      android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
+    }
+  }
+
+#else
+  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
+  UNUSED(javaLdLibraryPath, env);
+#endif
+}
+
+
+JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env, jstring javaFilename, jobject javaLoader,
+                                 jboolean isSharedNamespace, jstring javaLibrarySearchPath,
+                                 jstring javaLibraryPermittedPath) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (filename.c_str() == NULL) {
+    return NULL;
+  }
+
+  int32_t target_sdk_version = art::Runtime::Current()->GetTargetSdkVersion();
+
+  // Starting with N nativeLoad uses classloader local
+  // linker namespace instead of global LD_LIBRARY_PATH
+  // (23 is Marshmallow)
+  if (target_sdk_version <= 23) {
+    SetLdLibraryPath(env, javaLibrarySearchPath);
+  }
+
+  std::string error_msg;
+  {
+    art::ScopedObjectAccess soa(env);
+    art::StackHandleScope<1> hs(soa.Self());
+    art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM();
+    bool success = vm->LoadNativeLibrary(env,
+                                         filename.c_str(),
+                                         javaLoader,
+                                         isSharedNamespace == JNI_TRUE,
+                                         javaLibrarySearchPath,
+                                         javaLibraryPermittedPath,
+                                         &error_msg);
+    if (success) {
+      return nullptr;
+    }
+  }
+
+  // Don't let a pending exception from JNI_OnLoad cause a CheckJNI issue with NewStringUTF.
+  env->ExceptionClear();
+  return env->NewStringUTF(error_msg.c_str());
+}
+
+JNIEXPORT void JVM_StartThread(JNIEnv* env, jobject jthread, jlong stack_size, jboolean daemon) {
+  art::Thread::CreateNativeThread(env, jthread, stack_size, daemon == JNI_TRUE);
+}
+
+JNIEXPORT void JVM_SetThreadPriority(JNIEnv* env, jobject jthread, jint prio) {
+  art::ScopedObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != NULL) {
+    thread->SetNativePriority(prio);
+  }
+}
+
+JNIEXPORT void JVM_Yield(JNIEnv* env ATTRIBUTE_UNUSED, jclass threadClass ATTRIBUTE_UNUSED) {
+  sched_yield();
+}
+
+JNIEXPORT void JVM_Sleep(JNIEnv* env, jclass threadClass ATTRIBUTE_UNUSED,
+                         jobject java_lock, jlong millis) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::mirror::Object* lock = soa.Decode<art::mirror::Object*>(java_lock);
+  art::Monitor::Wait(art::Thread::Current(), lock, millis, 0, true, art::kSleeping);
+}
+
+JNIEXPORT jobject JVM_CurrentThread(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  return soa.AddLocalReference<jobject>(soa.Self()->GetPeer());
+}
+
+JNIEXPORT void JVM_Interrupt(JNIEnv* env, jobject jthread) {
+  art::ScopedFastNativeObjectAccess soa(env);
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+  art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+  if (thread != nullptr) {
+    thread->Interrupt(soa.Self());
+  }
+}
+
+JNIEXPORT jboolean JVM_IsInterrupted(JNIEnv* env, jobject jthread, jboolean clearInterrupted) {
+  if (clearInterrupted) {
+    return static_cast<art::JNIEnvExt*>(env)->self->Interrupted() ? JNI_TRUE : JNI_FALSE;
+  } else {
+    art::ScopedFastNativeObjectAccess soa(env);
+    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+    art::Thread* thread = art::Thread::FromManagedThread(soa, jthread);
+    return (thread != nullptr) ? thread->IsInterrupted() : JNI_FALSE;
+  }
+}
+
+JNIEXPORT jboolean JVM_HoldsLock(JNIEnv* env, jclass unused ATTRIBUTE_UNUSED, jobject jobj) {
+  art::ScopedObjectAccess soa(env);
+  art::mirror::Object* object = soa.Decode<art::mirror::Object*>(jobj);
+  if (object == NULL) {
+    art::ThrowNullPointerException("object == null");
+    return JNI_FALSE;
+  }
+  return soa.Self()->HoldsLock(object);
+}
+
+JNIEXPORT void JVM_SetNativeThreadName(JNIEnv* env, jobject jthread, jstring java_name) {
+  ScopedUtfChars name(env, java_name);
+  {
+    art::ScopedObjectAccess soa(env);
+    if (soa.Decode<art::mirror::Object*>(jthread) == soa.Self()->GetPeer()) {
+      soa.Self()->SetThreadName(name.c_str());
+      return;
+    }
+  }
+  // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
+  // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
+  // in the DDMS send code.
+  art::ThreadList* thread_list = art::Runtime::Current()->GetThreadList();
+  bool timed_out;
+  // Take suspend thread lock to avoid races with threads trying to suspend this one.
+  art::Thread* thread;
+  {
+    thread = thread_list->SuspendThreadByPeer(jthread, true, false, &timed_out);
+  }
+  if (thread != NULL) {
+    {
+      art::ScopedObjectAccess soa(env);
+      thread->SetThreadName(name.c_str());
+    }
+    thread_list->Resume(thread, false);
+  } else if (timed_out) {
+    LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
+        "failed to suspend within a generous timeout.";
+  }
+}
+
+JNIEXPORT jint JVM_IHashCode(JNIEnv* env ATTRIBUTE_UNUSED,
+                             jobject javaObject ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_IHashCode is not implemented";
+  return 0;
+}
+
+JNIEXPORT jlong JVM_NanoTime(JNIEnv* env ATTRIBUTE_UNUSED, jclass unused ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "JVM_NanoTime is not implemented";
+  return 0L;
+}
+
+JNIEXPORT void JVM_ArrayCopy(JNIEnv* /* env */, jclass /* unused */, jobject /* javaSrc */,
+                             jint /* srcPos */, jobject /* javaDst */, jint /* dstPos */,
+                             jint /* length */) {
+  UNIMPLEMENTED(FATAL) << "JVM_ArrayCopy is not implemented";
+}
+
+JNIEXPORT jint JVM_FindSignal(const char* name ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_FindSignal is not implemented";
+  return 0;
+}
+
+JNIEXPORT void* JVM_RegisterSignal(jint signum ATTRIBUTE_UNUSED, void* handler ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RegisterSignal is not implemented";
+  return nullptr;
+}
+
+JNIEXPORT jboolean JVM_RaiseSignal(jint signum ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "JVM_RaiseSignal is not implemented";
+  return JNI_FALSE;
+}
+
+JNIEXPORT __attribute__((noreturn))  void JVM_Halt(jint code) {
+  exit(code);
+}
+
+JNIEXPORT jboolean JVM_IsNaN(jdouble d) {
+  return isnan(d);
+}
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index dfd783b..2b92303 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -41,15 +41,13 @@
                                                     // Runtime::Abort
 }
 
-ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                                     RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::Parse(const RuntimeOptions& options,
+                          bool ignore_unrecognized,
+                          RuntimeArgumentMap* runtime_options) {
   CHECK(runtime_options != nullptr);
 
-  std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
-  if (parsed->Parse(options, ignore_unrecognized, runtime_options)) {
-    return parsed.release();
-  }
-  return nullptr;
+  ParsedOptions parser;
+  return parser.DoParse(options, ignore_unrecognized, runtime_options);
 }
 
 using RuntimeParser = CmdlineParser<RuntimeArgumentMap, RuntimeArgumentMap::Key>;
@@ -164,6 +162,9 @@
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitsaveprofilinginfo")
+          .WithValue(true)
+          .IntoKey(M::JITSaveProfilingInfo)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -398,14 +399,16 @@
   //  gLogVerbosity.oat = true;  // TODO: don't check this in!
   //  gLogVerbosity.profiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.signals = true;  // TODO: don't check this in!
+  //  gLogVerbosity.simulator = true; // TODO: don't check this in!
   //  gLogVerbosity.startup = true;  // TODO: don't check this in!
   //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
   //  gLogVerbosity.threads = true;  // TODO: don't check this in!
   //  gLogVerbosity.verifier = true;  // TODO: don't check this in!
 }
 
-bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized,
-                          RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::DoParse(const RuntimeOptions& options,
+                            bool ignore_unrecognized,
+                            RuntimeArgumentMap* runtime_options) {
   for (size_t i = 0; i < options.size(); ++i) {
     if (true && options[0].first == "-Xzygote") {
       LOG(INFO) << "option[" << i << "]=" << options[i].first;
@@ -556,7 +559,9 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 529dd5c..5974fb6 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -50,8 +50,9 @@
   static std::unique_ptr<RuntimeParser> MakeParser(bool ignore_unrecognized);
 
   // returns true if parsing succeeds, and stores the resulting options into runtime_options
-  static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                               RuntimeArgumentMap* runtime_options);
+  static bool Parse(const RuntimeOptions& options,
+                    bool ignore_unrecognized,
+                    RuntimeArgumentMap* runtime_options);
 
   bool (*hook_is_sensitive_thread_)();
   jint (*hook_vfprintf_)(FILE* stream, const char* format, va_list ap);
@@ -72,8 +73,9 @@
   void Exit(int status);
   void Abort();
 
-  bool Parse(const RuntimeOptions& options,  bool ignore_unrecognized,
-             RuntimeArgumentMap* runtime_options);
+  bool DoParse(const RuntimeOptions& options,
+               bool ignore_unrecognized,
+               RuntimeArgumentMap* runtime_options);
 };
 
 }  // namespace art
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index a8575de..5b90c6a 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -18,6 +18,8 @@
 
 #include <memory>
 
+#include "arch/instruction_set.h"
+#include "base/stringprintf.h"
 #include "common_runtime_test.h"
 
 namespace art {
@@ -34,18 +36,28 @@
   void* test_abort = reinterpret_cast<void*>(0xb);
   void* test_exit = reinterpret_cast<void*>(0xc);
 
-  std::string lib_core(CommonRuntimeTest::GetLibCoreDexFileName());
-
   std::string boot_class_path;
+  std::string class_path;
   boot_class_path += "-Xbootclasspath:";
-  boot_class_path += lib_core;
+
+  bool first_dex_file = true;
+  for (const std::string &dex_file_name :
+           CommonRuntimeTest::GetLibCoreDexFileNames()) {
+    if (!first_dex_file) {
+      class_path += ":";
+    } else {
+      first_dex_file = false;
+    }
+    class_path += dex_file_name;
+  }
+  boot_class_path += class_path;
 
   RuntimeOptions options;
   options.push_back(std::make_pair(boot_class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-classpath", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-cp", nullptr));
-  options.push_back(std::make_pair(lib_core.c_str(), nullptr));
+  options.push_back(std::make_pair(class_path.c_str(), nullptr));
   options.push_back(std::make_pair("-Ximage:boot_image", nullptr));
   options.push_back(std::make_pair("-Xcheck:jni", nullptr));
   options.push_back(std::make_pair("-Xms2048", nullptr));
@@ -60,8 +72,8 @@
   options.push_back(std::make_pair("exit", test_exit));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
@@ -69,8 +81,8 @@
 #define EXPECT_PARSED_EQ(expected, actual_key) EXPECT_EQ(expected, map.GetOrDefault(actual_key))
 #define EXPECT_PARSED_EXISTS(actual_key) EXPECT_TRUE(map.Exists(actual_key))
 
-  EXPECT_PARSED_EQ(lib_core, Opt::BootClassPath);
-  EXPECT_PARSED_EQ(lib_core, Opt::ClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::BootClassPath);
+  EXPECT_PARSED_EQ(class_path, Opt::ClassPath);
   EXPECT_PARSED_EQ(std::string("boot_image"), Opt::Image);
   EXPECT_PARSED_EXISTS(Opt::CheckJni);
   EXPECT_PARSED_EQ(2048U, Opt::MemoryInitialSize);
@@ -87,6 +99,8 @@
   EXPECT_FALSE(VLOG_IS_ON(jdwp));
   EXPECT_TRUE(VLOG_IS_ON(jni));
   EXPECT_FALSE(VLOG_IS_ON(monitor));
+  EXPECT_FALSE(VLOG_IS_ON(signals));
+  EXPECT_FALSE(VLOG_IS_ON(simulator));
   EXPECT_FALSE(VLOG_IS_ON(startup));
   EXPECT_FALSE(VLOG_IS_ON(third_party_jni));
   EXPECT_FALSE(VLOG_IS_ON(threads));
@@ -102,8 +116,8 @@
   options.push_back(std::make_pair("-Xgc:MC", nullptr));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
@@ -111,6 +125,40 @@
   EXPECT_TRUE(map.Exists(Opt::GcOption));
 
   XGcOption xgc = map.GetOrDefault(Opt::GcOption);
-  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);}
+  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);
+}
+
+TEST_F(ParsedOptionsTest, ParsedOptionsInstructionSet) {
+  using Opt = RuntimeArgumentMap;
+
+  {
+    // Nothing set, should be kRuntimeISA.
+    RuntimeOptions options;
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(kRuntimeISA, isa);
+  }
+
+  const char* isa_strings[] = { "arm", "arm64", "x86", "x86_64", "mips", "mips64" };
+  InstructionSet ISAs[] = { InstructionSet::kArm,
+                            InstructionSet::kArm64,
+                            InstructionSet::kX86,
+                            InstructionSet::kX86_64,
+                            InstructionSet::kMips,
+                            InstructionSet::kMips64 };
+  static_assert(arraysize(isa_strings) == arraysize(ISAs), "Need same amount.");
+
+  for (size_t i = 0; i < arraysize(isa_strings); ++i) {
+    RuntimeOptions options;
+    options.push_back(std::make_pair("imageinstructionset", isa_strings[i]));
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(ISAs[i], isa);
+  }
+}
 
 }  // namespace art
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 57472ad..4d9ca6d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -79,7 +79,7 @@
             mirror::Method::CreateFromArtMethod(soa.Self(), method)));
     // Now adds all interfaces virtual methods.
     for (mirror::Class* interface : interfaces) {
-      for (auto& m : interface->GetVirtualMethods(sizeof(void*))) {
+      for (auto& m : interface->GetDeclaredVirtualMethods(sizeof(void*))) {
         soa.Env()->SetObjectArrayElement(
             proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
                 mirror::Method::CreateFromArtMethod(soa.Self(), &m)));
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 837662d..6cea902 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -51,6 +51,23 @@
   kIntrinsicMinMaxLong,
   kIntrinsicMinMaxFloat,
   kIntrinsicMinMaxDouble,
+  kIntrinsicCos,
+  kIntrinsicSin,
+  kIntrinsicAcos,
+  kIntrinsicAsin,
+  kIntrinsicAtan,
+  kIntrinsicAtan2,
+  kIntrinsicCbrt,
+  kIntrinsicCosh,
+  kIntrinsicExp,
+  kIntrinsicExpm1,
+  kIntrinsicHypot,
+  kIntrinsicLog,
+  kIntrinsicLog10,
+  kIntrinsicNextAfter,
+  kIntrinsicSinh,
+  kIntrinsicTan,
+  kIntrinsicTanh,
   kIntrinsicSqrt,
   kIntrinsicCeil,
   kIntrinsicFloor,
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1552318..9cb37ee 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -283,7 +283,12 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -356,6 +361,7 @@
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
       }
       return true;
     }
@@ -586,6 +592,7 @@
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -614,6 +621,14 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
+  // Compiled code made an explicit deoptimization. Transfer the code
+  // to interpreter and clear the counter to JIT the method again.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  deopt_method->ClearCounter();
+  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+      deopt_method, GetQuickToInterpreterBridge());
+
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
   #ifdef __LP64__
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 7de6c06..19cf759 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -28,12 +28,12 @@
 
 namespace art {
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
+    // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
     // is used to create artificial data dependency from the is_gray
     // load to the ref field (ptr) load to avoid needing a load-load
     // barrier between the two.
@@ -42,9 +42,16 @@
     ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
         rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
     MirrorType* ref = ref_addr->AsMirrorPtr();
+    MirrorType* old_ref = ref;
     if (is_gray) {
       // Slow-path.
       ref = reinterpret_cast<MirrorType*>(Mark(ref));
+      // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
+      // updates before us, but it's ok.
+      if (kAlwaysUpdateField && ref != old_ref) {
+        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+            offset, old_ref, ref);
+      }
     }
     if (kEnableReadBarrierInvariantChecks) {
       CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
@@ -75,7 +82,7 @@
   }
 }
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root,
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = *root;
@@ -112,7 +119,7 @@
 }
 
 // TODO: Reduce copy paste
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root,
                                                GcRootSource* gc_root_source) {
   MirrorType* ref = root->AsMirrorPtr();
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index e7ad731..3169a8b 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -43,26 +43,24 @@
   // Enable the read barrier checks.
   static constexpr bool kEnableReadBarrierInvariantChecks = true;
 
-  // It's up to the implementation whether the given field gets
-  // updated whereas the return value must be an updated reference.
+  // It's up to the implementation whether the given field gets updated whereas the return value
+  // must be an updated reference unless kAlwaysUpdateField is true.
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+            bool kAlwaysUpdateField = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // It's up to the implementation whether the given root gets updated
   // whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root,
                                                   GcRootSource* gc_root_source = nullptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // It's up to the implementation whether the given root gets updated
   // whereas the return value must be an updated reference.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
-            bool kMaybeDuringStartup = false>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root,
                                                   GcRootSource* gc_root_source = nullptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -82,7 +80,8 @@
   static void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static mirror::Object* Mark(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE static mirror::Object* Mark(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   static mirror::Object* WhitePtr() {
     return reinterpret_cast<mirror::Object*>(white_ptr_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a210aa8..eeaadd4 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -218,6 +218,7 @@
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -431,14 +432,25 @@
   GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
-bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
+bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options) {
+  InitLogging(/* argv */ nullptr);  // Calls Locks::Init() as a side effect.
+  bool parsed = ParsedOptions::Parse(raw_options, ignore_unrecognized, runtime_options);
+  if (!parsed) {
+    LOG(ERROR) << "Failed to parse options";
+    return false;
+  }
+  return true;
+}
+
+bool Runtime::Create(RuntimeArgumentMap&& runtime_options) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != nullptr) {
     return false;
   }
-  InitLogging(nullptr);  // Calls Locks::Init() as a side effect.
   instance_ = new Runtime;
-  if (!instance_->Init(options, ignore_unrecognized)) {
+  if (!instance_->Init(std::move(runtime_options))) {
     // TODO: Currently deleting the instance will abort the runtime on destruction. Now This will
     // leak memory, instead. Fix the destructor. b/19100793.
     // delete instance_;
@@ -448,6 +460,12 @@
   return true;
 }
 
+bool Runtime::Create(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
+  RuntimeArgumentMap runtime_options;
+  return ParseOptions(raw_options, ignore_unrecognized, &runtime_options) &&
+      Create(std::move(runtime_options));
+}
+
 static jobject CreateSystemClassLoader(Runtime* runtime) {
   if (runtime->IsAotCompiler() && !runtime->GetCompilerCallbacks()->IsBootImage()) {
     return nullptr;
@@ -575,6 +593,7 @@
       PreInitializeNativeBridge(".");
     }
     InitNonZygoteOrPostFork(self->GetJniEnv(),
+                            /* is_system_server */ false,
                             NativeBridgeAction::kInitialize,
                             GetInstructionSetString(kRuntimeISA));
   }
@@ -601,7 +620,6 @@
       LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
       return true;
     }
-    StartProfiler(profile_output_filename_.c_str());
   }
 
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
@@ -665,7 +683,8 @@
 #endif
 }
 
-void Runtime::InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa) {
+void Runtime::InitNonZygoteOrPostFork(
+    JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   if (is_native_bridge_loaded_) {
@@ -687,7 +706,7 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (!safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
+  if (!is_system_server && !safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
     // Note that when running ART standalone (not zygote, nor zygote fork),
     // the jit may have already been created.
     CreateJit();
@@ -773,7 +792,7 @@
   std::unique_ptr<const OatFile> oat_file(
       OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
   if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
+    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
 
@@ -827,21 +846,14 @@
   sentinel_ = GcRoot<mirror::Object>(sentinel);
 }
 
-bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
+bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) {
+  RuntimeArgumentMap runtime_options(std::move(runtime_options_in));
   ATRACE_BEGIN("Runtime::Init");
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
   MemMap::Init();
 
   using Opt = RuntimeArgumentMap;
-  RuntimeArgumentMap runtime_options;
-  std::unique_ptr<ParsedOptions> parsed_options(
-      ParsedOptions::Create(raw_options, ignore_unrecognized, &runtime_options));
-  if (parsed_options.get() == nullptr) {
-    LOG(ERROR) << "Failed to parse options";
-    ATRACE_END();
-    return false;
-  }
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
   QuasiAtomic::Startup();
@@ -1199,18 +1211,31 @@
   // First set up JniConstants, which is used by both the runtime's built-in native
   // methods and libcore.
   JniConstants::init(env);
-  WellKnownClasses::Init(env);
 
   // Then set up the native methods provided by the runtime itself.
   RegisterRuntimeNativeMethods(env);
 
-  // Then set up libcore, which is just a regular JNI library with a regular JNI_OnLoad.
-  // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
-  // the library that implements System.loadLibrary!
+  // Initialize classes used in JNI. The initialization requires runtime native
+  // methods to be loaded first.
+  WellKnownClasses::Init(env);
+
+  // Then set up libjavacore / libopenjdk, which are just a regular JNI libraries with
+  // a regular JNI_OnLoad. Most JNI libraries can just use System.loadLibrary, but
+  // libcore can't because it's the library that implements System.loadLibrary!
   {
-    std::string reason;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, &reason)) {
-      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << reason;
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr,
+                                     /* is_shared_namespace */ false,
+                                     nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
+    }
+  }
+  {
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, "libopenjdk.so", nullptr,
+                                     /* is_shared_namespace */ false,
+                                     nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"libopenjdk.so\": " << error_msg;
     }
   }
 
@@ -1281,10 +1306,20 @@
 }
 
 void Runtime::DumpForSigQuit(std::ostream& os) {
+  // Dumping for SIGQIT may cause deadlocks if the the debugger is active. b/26118154
+  if (Dbg::IsDebuggerActive()) {
+    LOG(INFO) << "Skipping DumpForSigQuit due to active debugger";
+    return;
+  }
   GetClassLinker()->DumpForSigQuit(os);
   GetInternTable()->DumpForSigQuit(os);
   GetJavaVM()->DumpForSigQuit(os);
   GetHeap()->DumpForSigQuit(os);
+  if (GetJit() != nullptr) {
+    GetJit()->DumpForSigQuit(os);
+  } else {
+    os << "Running non JIT\n";
+  }
   TrackedAllocators::Dump(os);
   os << "\n";
 
@@ -1618,10 +1653,13 @@
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
-void Runtime::StartProfiler(const char* profile_output_filename) {
+void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
+                              const std::string& profile_output_filename) {
+  DCHECK(!profile_output_filename.empty());
+  if (jit_.get() != nullptr) {
+    jit_->SetDexLocationsForProfiling(code_paths);
+  }
   profile_output_filename_ = profile_output_filename;
-  profiler_started_ =
-      BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
 }
 
 // Transaction support.
@@ -1767,8 +1805,16 @@
   argv->push_back(feature_string);
 }
 
+void Runtime::MaybeSaveJitProfilingInfo() {
+  if (jit_.get() != nullptr && !profile_output_filename_.empty()) {
+    jit_->SaveProfilingInfo(profile_output_filename_);
+  }
+}
+
 void Runtime::UpdateProfilerState(int state) {
-  VLOG(profiler) << "Profiler state updated to " << state;
+  if (state == kProfileBackground) {
+    MaybeSaveJitProfilingInfo();
+  }
 }
 
 void Runtime::CreateJit() {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d61663c..5df1ca9 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -84,6 +84,7 @@
 class MonitorPool;
 class NullPointerHandler;
 class OatFileManager;
+struct RuntimeArgumentMap;
 class SignalCatcher;
 class StackOverflowHandler;
 class SuspensionHandler;
@@ -112,8 +113,17 @@
 
 class Runtime {
  public:
+  // Parse raw runtime options.
+  static bool ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options);
+
   // Creates and initializes a new runtime.
-  static bool Create(const RuntimeOptions& options, bool ignore_unrecognized)
+  static bool Create(RuntimeArgumentMap&& runtime_options)
+      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
+
+  // Creates and initializes a new runtime.
+  static bool Create(const RuntimeOptions& raw_options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
 
   // IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
@@ -447,7 +457,8 @@
 
   void PreZygoteFork();
   bool InitZygote();
-  void InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa);
+  void InitNonZygoteOrPostFork(
+      JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -457,7 +468,8 @@
     return &instrumentation_;
   }
 
-  void StartProfiler(const char* profile_output_filename);
+  void RegisterAppInfo(const std::vector<std::string>& code_paths,
+                       const std::string& profile_output_filename);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -599,7 +611,7 @@
 
   void BlockSignals();
 
-  bool Init(const RuntimeOptions& options, bool ignore_unrecognized)
+  bool Init(RuntimeArgumentMap&& runtime_options)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
   void InitNativeMethods() REQUIRES(!Locks::mutator_lock_);
   void InitThreadGroups(Thread* self);
@@ -608,12 +620,14 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  void MaybeSaveJitProfilingInfo();
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
   // NOTE: these must match the gc::ProcessState values as they come directly from the framework.
   static constexpr int kProfileForground = 0;
-  static constexpr int kProfileBackgrouud = 1;
+  static constexpr int kProfileBackground = 1;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kLastCalleeSaveType];
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 9051eda..5624285 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -71,6 +71,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
+RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 88ac00a..4610f6f 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -17,14 +17,13 @@
 #ifndef ART_RUNTIME_RUNTIME_OPTIONS_H_
 #define ART_RUNTIME_RUNTIME_OPTIONS_H_
 
-#include "runtime/base/variant_map.h"
-#include "cmdline/cmdline_types.h"  // TODO: don't need to include this file here
+#include "base/variant_map.h"
+#include "cmdline_types.h"  // TODO: don't need to include this file here
 
 // Map keys
 #include <vector>
 #include <string>
-#include "runtime/base/logging.h"
-#include "cmdline/unit.h"
+#include "base/logging.h"
 #include "jdwp/jdwp.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 7ac17b6..4e62dda 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -92,7 +92,7 @@
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
-  iterator Put(const K& k, const V&& v) {
+  iterator Put(const K& k, V&& v) {
     std::pair<iterator, bool> result = map_.emplace(k, std::move(v));
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
@@ -105,7 +105,7 @@
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, v);
   }
-  iterator PutBefore(iterator pos, const K& k, const V&& v) {
+  iterator PutBefore(iterator pos, const K& k, V&& v) {
     // Check that we're using the correct position and the key is not in the map.
     DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
new file mode 100644
index 0000000..c154eb6
--- /dev/null
+++ b/runtime/simulator/Android.mk
@@ -0,0 +1,105 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBART_SIMULATOR_SRC_FILES := \
+  code_simulator.cc \
+  code_simulator_arm64.cc
+
+# $(1): target or host
+# $(2): ndebug or debug
+define build-libart-simulator
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),ndebug)
+    ifneq ($(2),debug)
+      $$(error expected ndebug or debug for argument 2, received $(2))
+    endif
+  endif
+
+  art_target_or_host := $(1)
+  art_ndebug_or_debug := $(2)
+
+  include $(CLEAR_VARS)
+  ifeq ($$(art_target_or_host),host)
+     LOCAL_IS_HOST_MODULE := true
+  endif
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  ifeq ($$(art_ndebug_or_debug),ndebug)
+    LOCAL_MODULE := libart-simulator
+  else # debug
+    LOCAL_MODULE := libartd-simulator
+  endif
+
+  LOCAL_MODULE_TAGS := optional
+  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+
+  LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES)
+
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
+    LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
+  endif
+
+  LOCAL_SHARED_LIBRARIES += liblog
+  ifeq ($$(art_ndebug_or_debug),debug)
+    LOCAL_SHARED_LIBRARIES += libartd
+  else
+    LOCAL_SHARED_LIBRARIES += libart
+  endif
+
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+  LOCAL_MULTILIB := both
+
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
+  # For simulator_arm64.
+  ifeq ($$(art_ndebug_or_debug),debug)
+     LOCAL_SHARED_LIBRARIES += libvixld
+  else
+     LOCAL_SHARED_LIBRARIES += libvixl
+  endif
+  ifeq ($$(art_target_or_host),target)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+endef
+
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-libart-simulator,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-libart-simulator,host,debug))
+endif
diff --git a/runtime/simulator/code_simulator.cc b/runtime/simulator/code_simulator.cc
new file mode 100644
index 0000000..1a11160
--- /dev/null
+++ b/runtime/simulator/code_simulator.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator.h"
+#include "simulator/code_simulator_arm64.h"
+
+namespace art {
+
+CodeSimulator* CodeSimulator::CreateCodeSimulator(InstructionSet target_isa) {
+  switch (target_isa) {
+    case kArm64:
+      return arm64::CodeSimulatorArm64::CreateCodeSimulatorArm64();
+    default:
+      return nullptr;
+  }
+}
+
+CodeSimulator* CreateCodeSimulator(InstructionSet target_isa) {
+  return CodeSimulator::CreateCodeSimulator(target_isa);
+}
+
+}  // namespace art
diff --git a/runtime/simulator/code_simulator.h b/runtime/simulator/code_simulator.h
new file mode 100644
index 0000000..bd48909
--- /dev/null
+++ b/runtime/simulator/code_simulator.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+
+#include "arch/instruction_set.h"
+
+namespace art {
+
+class CodeSimulator {
+ public:
+  CodeSimulator() {}
+  virtual ~CodeSimulator() {}
+  // Returns a null pointer if a simulator cannot be found for target_isa.
+  static CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+  virtual void RunFrom(intptr_t code_buffer) = 0;
+
+  // Get return value according to C ABI.
+  virtual bool GetCReturnBool() const = 0;
+  virtual int32_t GetCReturnInt32() const = 0;
+  virtual int64_t GetCReturnInt64() const = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulator);
+};
+
+extern "C" CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
diff --git a/runtime/simulator/code_simulator_arm64.cc b/runtime/simulator/code_simulator_arm64.cc
new file mode 100644
index 0000000..39dfa6d
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator_arm64.h"
+
+namespace art {
+namespace arm64 {
+
+// VIXL has not been tested on 32bit architectures, so vixl::Simulator is not always
+// available. To avoid linker error on these architectures, we check if we can simulate
+// in the beginning of following methods, with compile time constant `kCanSimulate`.
+// TODO: when vixl::Simulator is always available, remove the these checks.
+
+CodeSimulatorArm64* CodeSimulatorArm64::CreateCodeSimulatorArm64() {
+  if (kCanSimulate) {
+    return new CodeSimulatorArm64();
+  } else {
+    return nullptr;
+  }
+}
+
+CodeSimulatorArm64::CodeSimulatorArm64()
+    : CodeSimulator(), decoder_(nullptr), simulator_(nullptr) {
+  DCHECK(kCanSimulate);
+  decoder_ = new vixl::Decoder();
+  simulator_ = new vixl::Simulator(decoder_);
+}
+
+CodeSimulatorArm64::~CodeSimulatorArm64() {
+  DCHECK(kCanSimulate);
+  delete simulator_;
+  delete decoder_;
+}
+
+void CodeSimulatorArm64::RunFrom(intptr_t code_buffer) {
+  DCHECK(kCanSimulate);
+  simulator_->RunFrom(reinterpret_cast<const vixl::Instruction*>(code_buffer));
+}
+
+bool CodeSimulatorArm64::GetCReturnBool() const {
+  DCHECK(kCanSimulate);
+  return simulator_->wreg(0);
+}
+
+int32_t CodeSimulatorArm64::GetCReturnInt32() const {
+  DCHECK(kCanSimulate);
+  return simulator_->wreg(0);
+}
+
+int64_t CodeSimulatorArm64::GetCReturnInt64() const {
+  DCHECK(kCanSimulate);
+  return simulator_->xreg(0);
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h
new file mode 100644
index 0000000..10fceb9
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+
+#include "memory"
+#include "simulator/code_simulator.h"
+// TODO: make vixl clean wrt -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "vixl/a64/simulator-a64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class CodeSimulatorArm64 : public CodeSimulator {
+ public:
+  static CodeSimulatorArm64* CreateCodeSimulatorArm64();
+  virtual ~CodeSimulatorArm64();
+
+  void RunFrom(intptr_t code_buffer) OVERRIDE;
+
+  bool GetCReturnBool() const OVERRIDE;
+  int32_t GetCReturnInt32() const OVERRIDE;
+  int64_t GetCReturnInt64() const OVERRIDE;
+
+ private:
+  CodeSimulatorArm64();
+
+  vixl::Decoder* decoder_;
+  vixl::Simulator* simulator_;
+
+  // TODO: Enable CodeSimulatorArm64 for more host ISAs once vixl::Simulator supports them.
+  static constexpr bool kCanSimulate = (kRuntimeISA == kX86_64);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorArm64);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 63e6326..13e3774 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -164,14 +164,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +195,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +229,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -2463,6 +2478,23 @@
   QUICK_ENTRY_POINT_INFO(pCmpgFloat)
   QUICK_ENTRY_POINT_INFO(pCmplDouble)
   QUICK_ENTRY_POINT_INFO(pCmplFloat)
+  QUICK_ENTRY_POINT_INFO(pCos)
+  QUICK_ENTRY_POINT_INFO(pSin)
+  QUICK_ENTRY_POINT_INFO(pAcos)
+  QUICK_ENTRY_POINT_INFO(pAsin)
+  QUICK_ENTRY_POINT_INFO(pAtan)
+  QUICK_ENTRY_POINT_INFO(pAtan2)
+  QUICK_ENTRY_POINT_INFO(pCbrt)
+  QUICK_ENTRY_POINT_INFO(pCosh)
+  QUICK_ENTRY_POINT_INFO(pExp)
+  QUICK_ENTRY_POINT_INFO(pExpm1)
+  QUICK_ENTRY_POINT_INFO(pHypot)
+  QUICK_ENTRY_POINT_INFO(pLog)
+  QUICK_ENTRY_POINT_INFO(pLog10)
+  QUICK_ENTRY_POINT_INFO(pNextAfter)
+  QUICK_ENTRY_POINT_INFO(pSinh)
+  QUICK_ENTRY_POINT_INFO(pTan)
+  QUICK_ENTRY_POINT_INFO(pTanh)
   QUICK_ENTRY_POINT_INFO(pFmod)
   QUICK_ENTRY_POINT_INFO(pL2d)
   QUICK_ENTRY_POINT_INFO(pFmodf)
@@ -2516,6 +2548,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMark)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
@@ -2546,7 +2579,8 @@
     if (is_deoptimization) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
     }
   }
   // Don't leave exception visible while we try to find the handler, which may cause class
diff --git a/runtime/thread.h b/runtime/thread.h
index 4624f27..6cb895c 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -545,6 +545,13 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, state_and_flags));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> IsGcMarkingOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
+  }
+
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
@@ -849,10 +856,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1224,7 +1235,7 @@
     RuntimeStats stats;
   } tls64_;
 
-  struct PACKED(4) tls_ptr_sized_values {
+  struct PACKED(sizeof(void*)) tls_ptr_sized_values {
       tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
       managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
       self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b09b87f..a390908 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -948,7 +948,12 @@
   Locks::mutator_lock_->ExclusiveLock(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
 #endif
-  AssertThreadsAreSuspended(self, self, debug_thread);
+  // Disabled for the following race condition:
+  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
+  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
+  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
+  // state (from SetStateUnsafe).
+  // AssertThreadsAreSuspended(self, self, debug_thread);
 
   VLOG(threads) << *self << " SuspendAllForDebugger complete";
 }
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 10600e2..a2d6363 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -23,28 +23,50 @@
 
 namespace art {
 
+// This is used only from debugger and test code.
 size_t CountModifiedUtf8Chars(const char* utf8) {
+  return CountModifiedUtf8Chars(utf8, strlen(utf8));
+}
+
+/*
+ * This does not validate UTF8 rules (nor did older code). But it gets the right answer
+ * for valid UTF-8 and that's fine because it's used only to size a buffer for later
+ * conversion.
+ *
+ * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows:
+ * U+0001  - U+007F   0xxxxxxx
+ * U+0080  - U+07FF   110xxxxx 10xxxxxx
+ * U+0800  - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
+ * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from
+ * standard UTF-8).
+ * The four byte encoding converts to two utf16 characters.
+ */
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
   size_t len = 0;
-  int ic;
-  while ((ic = *utf8++) != '\0') {
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
     len++;
-    if ((ic & 0x80) == 0) {
-      // one-byte encoding
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
       continue;
     }
-    // two- or three-byte encoding
+    // Two- or three-byte encoding.
     utf8++;
     if ((ic & 0x20) == 0) {
-      // two-byte encoding
+      // Two-byte encoding.
       continue;
     }
     utf8++;
     if ((ic & 0x10) == 0) {
-      // three-byte encoding
+      // Three-byte encoding.
       continue;
     }
 
-    // four-byte encoding: needs to be converted into a surrogate
+    // Four-byte encoding: needs to be converted into a surrogate
     // pair.
     utf8++;
     len++;
@@ -52,6 +74,7 @@
   return len;
 }
 
+// This is used only from debugger and test code.
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
   while (*utf8_data_in != '\0') {
     const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in);
@@ -65,13 +88,53 @@
   }
 }
 
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
+                                const char* utf8_data_in, size_t in_bytes) {
+  const char *in_start = utf8_data_in;
+  const char *in_end = utf8_data_in + in_bytes;
+  uint16_t *out_p = utf16_data_out;
+
+  if (LIKELY(out_chars == in_bytes)) {
+    // Common case where all characters are ASCII.
+    for (const char *p = in_start; p < in_end;) {
+      // Safe even if char is signed because ASCII characters always have
+      // the high bit cleared.
+      *out_p++ = dchecked_integral_cast<uint16_t>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  for (const char *p = in_start; p < in_end;) {
+    const uint32_t ch = GetUtf16FromUtf8(&p);
+    const uint16_t leading = GetLeadingUtf16Char(ch);
+    const uint16_t trailing = GetTrailingUtf16Char(ch);
+
+    *out_p++ = leading;
+    if (trailing != 0) {
+      *out_p++ = trailing;
+    }
+  }
+}
+
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = dchecked_integral_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
   while (char_count--) {
     const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
-      // char_count == 0 here implies we've encountered an unpaired
+      // Char_count == 0 here implies we've encountered an unpaired
       // surrogate and we have no choice but to encode it as 3-byte UTF
       // sequence. Note that unpaired surrogates can occur as a part of
       // "normal" operation.
@@ -115,8 +178,8 @@
   return static_cast<int32_t>(hash);
 }
 
-size_t ComputeModifiedUtf8Hash(const char* chars) {
-  size_t hash = 0;
+uint32_t ComputeModifiedUtf8Hash(const char* chars) {
+  uint32_t hash = 0;
   while (*chars != '\0') {
     hash = hash * 31 + *chars++;
   }
@@ -161,34 +224,31 @@
 
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
-  while (char_count--) {
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
     const uint16_t ch = *chars++;
-    if (ch > 0 && ch <= 0x7f) {
-      ++result;
-    } else if (ch >= 0xd800 && ch <= 0xdbff) {
-      if (char_count > 0) {
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
         const uint16_t ch2 = *chars;
         // If we find a properly paired surrogate, we emit it as a 4 byte
         // UTF sequence. If we find an unpaired leading or trailing surrogate,
         // we emit it as a 3 byte sequence like would have done earlier.
-        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
           chars++;
-          char_count--;
-
           result += 4;
-        } else {
-          result += 3;
+          continue;
         }
-      } else {
-        // This implies we found an unpaired trailing surrogate at the end
-        // of a string.
-        result += 3;
       }
-    } else if (ch > 0x7ff) {
-      result += 3;
-    } else {
-      result += 2;
     }
+    result += 3;
   }
   return result;
 }
diff --git a/runtime/utf.h b/runtime/utf.h
index 1193d29..4abd605 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -40,6 +40,7 @@
  * Returns the number of UTF-16 characters in the given modified UTF-8 string.
  */
 size_t CountModifiedUtf8Chars(const char* utf8);
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count);
 
 /*
  * Returns the number of modified UTF-8 bytes needed to represent the given
@@ -51,6 +52,8 @@
  * Convert from Modified UTF-8 to UTF-16.
  */
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in);
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars,
+                                const char* utf8_in, size_t in_bytes);
 
 /*
  * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner
@@ -71,7 +74,8 @@
  * this anyway, so if you want a NUL-terminated string, you know where to
  * put the NUL byte.
  */
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count);
 
 /*
  * The java.lang.String hashCode() algorithm.
@@ -81,8 +85,8 @@
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
 // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a
-// size_t and hashes individual chars instead of codepoint words.
-size_t ComputeModifiedUtf8Hash(const char* chars);
+// uint32_t and hashes individual chars instead of codepoint words.
+uint32_t ComputeModifiedUtf8Hash(const char* chars);
 
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 94a6ea5..5239e40 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,7 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <map>
 #include <vector>
 
 namespace art {
@@ -48,7 +49,7 @@
 };
 
 // A test string that contains a UTF-8 encoding of a surrogate pair
-// (code point = U+10400)
+// (code point = U+10400).
 static const uint8_t kSurrogateEncoding[] = {
     0xed, 0xa0, 0x81,
     0xed, 0xb0, 0x80,
@@ -66,13 +67,13 @@
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(1, ptr, start);
 
-  // Two byte sequence
+  // Two byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(3, ptr, start);
 
-  // Three byte sequence
+  // Three byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -84,7 +85,7 @@
   EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(10, ptr, start);
 
-  // Null terminator
+  // Null terminator.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -117,7 +118,8 @@
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
-  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(),
+                             &input[0], input.size());
   EXPECT_EQ(expected, output);
 }
 
@@ -139,10 +141,10 @@
   AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
 
   AssertConversion({
-      0xd802, 0xdc02,  // Surrogate pair
-      0xdef0, 0xdcff,  // Three byte encodings
-      0x0101, 0x0000,  // Two byte encodings
-      'p'   , 'p'      // One byte encoding
+      0xd802, 0xdc02,  // Surrogate pair.
+      0xdef0, 0xdcff,  // Three byte encodings.
+      0x0101, 0x0000,  // Two byte encodings.
+      'p'   , 'p'      // One byte encoding.
     }, {
       0xf0, 0x90, 0xa0, 0x82,
       0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
@@ -155,9 +157,225 @@
   // Unpaired trailing surrogate at the end of input.
   AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
   // Unpaired (or incorrectly paired) surrogates in the middle of the input.
-  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes {
+      {{ 'h' }, { 'h' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x81 }, { 0xc2, 0x81 }},
+      {{ 0x801 }, { 0xe0, 0xa0, 0x81 }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes {
+      {{ 'e' }, { 'e' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x7ff }, { 0xdf, 0xbf }},
+      {{ 0xffff }, { 0xef, 0xbf, 0xbf }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests {
+      {{ 0xd801 }, { 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }},
+      {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }},
+  };
+  for (const auto& prefix : prefixes) {
+    const std::vector<uint16_t>& prefix_in = prefix.first;
+    const std::vector<uint8_t>& prefix_out = prefix.second;
+    for (const auto& test : tests) {
+      const std::vector<uint16_t>& test_in = test.first;
+      const std::vector<uint8_t>& test_out = test.second;
+      for (const auto& suffix : suffixes) {
+        const std::vector<uint16_t>& suffix_in = suffix.first;
+        const std::vector<uint8_t>& suffix_out = suffix.second;
+        std::vector<uint16_t> in = prefix_in;
+        in.insert(in.end(), test_in.begin(), test_in.end());
+        in.insert(in.end(), suffix_in.begin(), suffix_in.end());
+        std::vector<uint8_t> out = prefix_out;
+        out.insert(out.end(), test_out.begin(), test_out.end());
+        out.insert(out.end(), suffix_out.begin(), suffix_out.end());
+        AssertConversion(in, out);
+      }
+    }
+  }
+}
+
+// Old versions of functions, here to compare answers with optimized versions.
+
+size_t CountModifiedUtf8Chars_reference(const char* utf8) {
+  size_t len = 0;
+  int ic;
+  while ((ic = *utf8++) != '\0') {
+    len++;
+    if ((ic & 0x80) == 0) {
+      // one-byte encoding
+      continue;
+    }
+    // two- or three-byte encoding
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // two-byte encoding
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // three-byte encoding
+      continue;
+    }
+
+    // four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  while (char_count--) {
+    const uint16_t ch = *chars++;
+    if (ch > 0 && ch <= 0x7f) {
+      ++result;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
+      } else {
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
+      }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
+    }
+  }
+  return result;
+}
+
+static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in,
+                                                 size_t char_count) {
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again.
+
+static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) {
+  first = (code_point >> 10) + 0xd7c0;
+  second = (code_point & 0x03ff) + 0xdc00;
+}
+
+static void testConversions(uint16_t *buf, int char_count) {
+  char bytes_test[8], bytes_reference[8];
+  uint16_t out_buf_test[4], out_buf_reference[4];
+  int byte_count_test, byte_count_reference;
+  int char_count_test, char_count_reference;
+
+  // Calculate the number of utf-8 bytes for the utf-16 chars.
+  byte_count_reference = CountUtf8Bytes_reference(buf, char_count);
+  byte_count_test = CountUtf8Bytes(buf, char_count);
+  EXPECT_EQ(byte_count_reference, byte_count_test);
+
+  // Convert the utf-16 string to utf-8 bytes.
+  ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+  ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
+  for (int i = 0; i < byte_count_test; ++i) {
+    EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+  }
+
+  // Calculate the number of utf-16 chars from the utf-8 bytes.
+  bytes_reference[byte_count_reference] = 0;  // Reference function needs null termination.
+  char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+  char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+  EXPECT_EQ(char_count, char_count_reference);
+  EXPECT_EQ(char_count, char_count_test);
+
+  // Convert the utf-8 bytes back to utf-16 chars.
+  // Does not need copied _reference version of the function because the original
+  // function with the old API is retained for debug/testing code.
+  ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+  ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
+  for (int i = 0; i < char_count_test; ++i) {
+    EXPECT_EQ(buf[i], out_buf_reference[i]);
+    EXPECT_EQ(buf[i], out_buf_test[i]);
+  }
+}
+
+TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) {
+  for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) {
+    uint16_t buf[4];
+    if (codePoint <= 0xffff) {
+      if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
+        // According to the Unicode standard, no character will ever
+        // be assigned to these code points, and they can not be encoded
+        // into either utf-16 or utf-8.
+        continue;
+      }
+      buf[0] = 'h';
+      buf[1] = codePoint;
+      buf[2] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+    } else {
+      buf[0] = 'h';
+      codePointToSurrogatePair(codePoint, buf[1], buf[2]);
+      buf[3] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf, 4);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+      testConversions(buf + 1, 3);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 68db7e3..eddc3a4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1392,21 +1392,6 @@
   return filename;
 }
 
-bool IsZipMagic(uint32_t magic) {
-  return (('P' == ((magic >> 0) & 0xff)) &&
-          ('K' == ((magic >> 8) & 0xff)));
-}
-
-bool IsDexMagic(uint32_t magic) {
-  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
-}
-
-bool IsOatMagic(uint32_t magic) {
-  return (memcmp(reinterpret_cast<const uint8_t*>(magic),
-                 OatHeader::kOatMagic,
-                 sizeof(OatHeader::kOatMagic)) == 0);
-}
-
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
   const std::string command_line(Join(arg_vector, ' '));
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 3690f86..5b9e963 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -18,9 +18,11 @@
 #define ART_RUNTIME_UTILS_H_
 
 #include <pthread.h>
+#include <stdlib.h>
 
 #include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -271,11 +273,6 @@
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
 
-// Check whether the given magic matches a known file type.
-bool IsZipMagic(uint32_t magic);
-bool IsDexMagic(uint32_t magic);
-bool IsOatMagic(uint32_t magic);
-
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);
 
@@ -350,6 +347,26 @@
                  double* parsed_value,
                  UsageFn Usage);
 
+#if defined(__BIONIC__)
+struct Arc4RandomGenerator {
+  typedef uint32_t result_type;
+  static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
+  static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+  uint32_t operator() () { return arc4random(); }
+};
+using RNG = Arc4RandomGenerator;
+#else
+using RNG = std::random_device;
+#endif
+
+template <typename T>
+T GetRandomNumber(T min, T max) {
+  CHECK_LT(min, max);
+  std::uniform_int_distribution<T> dist(min, max);
+  RNG rng;
+  return dist(rng);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 364b8ce..d75587b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -116,6 +116,7 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
                                                         mirror::Class* klass,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
                                                         bool log_hard_failures,
                                                         std::string* error) {
@@ -140,9 +141,9 @@
   }
   if (early_failure) {
     *error = "Verifier rejected class " + PrettyDescriptor(klass) + failure_message;
-    if (Runtime::Current()->IsAotCompiler()) {
+    if (callbacks != nullptr) {
       ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
-      Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
+      callbacks->ClassRejected(ref);
     }
     return kHardFailure;
   }
@@ -154,6 +155,7 @@
                      dex_cache,
                      class_loader,
                      class_def,
+                     callbacks,
                      allow_soft_failures,
                      log_hard_failures,
                      error);
@@ -172,6 +174,7 @@
                                    ClassDataItemIterator* it,
                                    Handle<mirror::DexCache> dex_cache,
                                    Handle<mirror::ClassLoader> class_loader,
+                                   CompilerCallbacks* callbacks,
                                    bool allow_soft_failures,
                                    bool log_hard_failures,
                                    bool need_precise_constants,
@@ -192,7 +195,7 @@
     }
     previous_method_idx = method_idx;
     InvokeType type = it->GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
+    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
         *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
     if (method == nullptr) {
       DCHECK(self->IsExceptionPending());
@@ -212,6 +215,7 @@
                                                       it->GetMethodCodeItem(),
                                                       method,
                                                       it->GetMethodAccessFlags(),
+                                                      callbacks,
                                                       allow_soft_failures,
                                                       log_hard_failures,
                                                       need_precise_constants,
@@ -241,6 +245,7 @@
                                                         Handle<mirror::DexCache> dex_cache,
                                                         Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
+                                                        CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
                                                         bool log_hard_failures,
                                                         std::string* error) {
@@ -274,6 +279,7 @@
                       &it,
                       dex_cache,
                       class_loader,
+                      callbacks,
                       allow_soft_failures,
                       log_hard_failures,
                       false /* need precise constants */,
@@ -288,6 +294,7 @@
                       &it,
                       dex_cache,
                       class_loader,
+                      callbacks,
                       allow_soft_failures,
                       log_hard_failures,
                       false /* need precise constants */,
@@ -322,6 +329,7 @@
                                                          const DexFile::CodeItem* code_item,
                                                          ArtMethod* method,
                                                          uint32_t method_access_flags,
+                                                         CompilerCallbacks* callbacks,
                                                          bool allow_soft_failures,
                                                          bool log_hard_failures,
                                                          bool need_precise_constants,
@@ -336,6 +344,12 @@
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
     CHECK(!verifier.have_pending_hard_failure_);
+
+    if (code_item != nullptr && callbacks != nullptr) {
+      // Let the interested party know that the method was verified.
+      callbacks->MethodVerified(&verifier);
+    }
+
     if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
         verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
@@ -363,8 +377,14 @@
             verifier.failure_messages_[verifier.failure_messages_.size() - 1]->str();
       }
       result = kHardFailure;
+
+      if (callbacks != nullptr) {
+        // Let the interested party know that we failed the class.
+        ClassReference ref(dex_file, dex_file->GetIndexForClassDef(*class_def));
+        callbacks->ClassRejected(ref);
+      }
     }
-    if (kDebugVerify) {
+    if (VLOG_IS_ON(verifier)) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
@@ -408,13 +428,18 @@
 }
 
 MethodVerifier::MethodVerifier(Thread* self,
-                               const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
+                               const DexFile* dex_file,
+                               Handle<mirror::DexCache> dex_cache,
                                Handle<mirror::ClassLoader> class_loader,
                                const DexFile::ClassDef* class_def,
-                               const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
-                               ArtMethod* method, uint32_t method_access_flags,
-                               bool can_load_classes, bool allow_soft_failures,
-                               bool need_precise_constants, bool verify_to_dump,
+                               const DexFile::CodeItem* code_item,
+                               uint32_t dex_method_idx,
+                               ArtMethod* method,
+                               uint32_t method_access_flags,
+                               bool can_load_classes,
+                               bool allow_soft_failures,
+                               bool need_precise_constants,
+                               bool verify_to_dump,
                                bool allow_thread_suspension)
     : self_(self),
       arena_stack_(Runtime::Current()->GetArenaPool()),
@@ -739,10 +764,7 @@
   result = result && VerifyInstructions();
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
-  // Compute information for compiler.
-  if (result && runtime->IsCompiler()) {
-    result = runtime->GetCompilerCallbacks()->MethodVerified(this);
-  }
+
   return result;
 }
 
@@ -802,10 +824,6 @@
       // Hard verification failures at compile time will still fail at runtime, so the class is
       // marked as rejected to prevent it from being compiled.
     case VERIFY_ERROR_BAD_CLASS_HARD: {
-      if (Runtime::Current()->IsAotCompiler()) {
-        ClassReference ref(dex_file_, dex_file_->GetIndexForClassDef(*class_def_));
-        Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
-      }
       have_pending_hard_failure_ = true;
       if (VLOG_IS_ON(verifier) && kDumpRegLinesOnHardFailureIfVLOG) {
         ScopedObjectAccess soa(Thread::Current());
@@ -3638,7 +3656,9 @@
   const RegType& referrer = GetDeclaringClass();
   auto* cl = Runtime::Current()->GetClassLinker();
   auto pointer_size = cl->GetImagePointerSize();
+
   ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx, pointer_size);
+  bool stash_method = false;
   if (res_method == nullptr) {
     const char* name = dex_file_->GetMethodName(method_id);
     const Signature signature = dex_file_->GetMethodSignature(method_id);
@@ -3651,7 +3671,7 @@
       res_method = klass->FindVirtualMethod(name, signature, pointer_size);
     }
     if (res_method != nullptr) {
-      dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+      stash_method = true;
     } else {
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
@@ -3680,6 +3700,38 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
+
+  // Check that interface methods are static or match interface classes.
+  // We only allow statics if we don't have default methods enabled.
+  //
+  // Note: this check must be after the initializer check, as those are required to fail a class,
+  //       while this check implies an IncompatibleClassChangeError.
+  if (klass->IsInterface()) {
+    Runtime* runtime = Runtime::Current();
+    const bool default_methods_supported =
+        runtime == nullptr ||
+        runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+    if (method_type != METHOD_INTERFACE &&
+        (!default_methods_supported || method_type != METHOD_STATIC)) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in an interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  } else {
+    if (method_type == METHOD_INTERFACE) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in a non-interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  }
+
+  // Only stash after the above passed. Otherwise the method wasn't guaranteed to be correct.
+  if (stash_method) {
+    dex_cache_->SetResolvedMethod(dex_method_idx, res_method, pointer_size);
+  }
+
   // Check if access is allowed.
   if (!referrer.CanAccessMember(res_method->GetDeclaringClass(), res_method->GetAccessFlags())) {
     Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call " << PrettyMethod(res_method)
@@ -3692,23 +3744,6 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
-  // Check that interface methods are static or match interface classes.
-  // We only allow statics if we don't have default methods enabled.
-  Runtime* runtime = Runtime::Current();
-  const bool default_methods_supported =
-      runtime == nullptr ||
-      runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
-  if (klass->IsInterface() &&
-      method_type != METHOD_INTERFACE &&
-      (!default_methods_supported || method_type != METHOD_STATIC)) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "non-interface method " << PrettyMethod(res_method)
-                                    << " is in an interface class " << PrettyClass(klass);
-    return nullptr;
-  } else if (!klass->IsInterface() && method_type == METHOD_INTERFACE) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "interface method " << PrettyMethod(res_method)
-                                    << " is in a non-interface class " << PrettyClass(klass);
-    return nullptr;
-  }
   // See if the method type implied by the invoke instruction matches the access flags for the
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 719f0d7..79db576 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -33,6 +33,7 @@
 
 namespace art {
 
+class CompilerCallbacks;
 class Instruction;
 struct ReferenceMap2Visitor;
 class Thread;
@@ -141,6 +142,7 @@
   /* Verify a class. Returns "kNoFailure" on success. */
   static FailureKind VerifyClass(Thread* self,
                                  mirror::Class* klass,
+                                 CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
                                  bool log_hard_failures,
                                  std::string* error)
@@ -150,6 +152,7 @@
                                  Handle<mirror::DexCache> dex_cache,
                                  Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
+                                 CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
                                  bool log_hard_failures,
                                  std::string* error)
@@ -216,16 +219,34 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
+  MethodVerifier(Thread* self,
+                 const DexFile* dex_file,
+                 Handle<mirror::DexCache> dex_cache,
+                 Handle<mirror::ClassLoader> class_loader,
+                 const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item,
+                 uint32_t method_idx,
                  ArtMethod* method,
-                 uint32_t access_flags, bool can_load_classes, bool allow_soft_failures,
-                 bool need_precise_constants, bool allow_thread_suspension)
+                 uint32_t access_flags,
+                 bool can_load_classes,
+                 bool allow_soft_failures,
+                 bool need_precise_constants,
+                 bool allow_thread_suspension)
           SHARED_REQUIRES(Locks::mutator_lock_)
-      : MethodVerifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx,
-                       method, access_flags, can_load_classes, allow_soft_failures,
-                       need_precise_constants, false, allow_thread_suspension) {}
+      : MethodVerifier(self,
+                       dex_file,
+                       dex_cache,
+                       class_loader,
+                       class_def,
+                       code_item,
+                       method_idx,
+                       method,
+                       access_flags,
+                       can_load_classes,
+                       allow_soft_failures,
+                       need_precise_constants,
+                       false,
+                       allow_thread_suspension) {}
 
   ~MethodVerifier();
 
@@ -299,12 +320,20 @@
   }
 
   // Private constructor for dumping.
-  MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item, uint32_t method_idx,
-                 ArtMethod* method, uint32_t access_flags,
-                 bool can_load_classes, bool allow_soft_failures, bool need_precise_constants,
-                 bool verify_to_dump, bool allow_thread_suspension)
+  MethodVerifier(Thread* self,
+                 const DexFile* dex_file,
+                 Handle<mirror::DexCache> dex_cache,
+                 Handle<mirror::ClassLoader> class_loader,
+                 const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item,
+                 uint32_t method_idx,
+                 ArtMethod* method,
+                 uint32_t access_flags,
+                 bool can_load_classes,
+                 bool allow_soft_failures,
+                 bool need_precise_constants,
+                 bool verify_to_dump,
+                 bool allow_thread_suspension)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Adds the given string to the beginning of the last failure message.
@@ -323,6 +352,7 @@
                             ClassDataItemIterator* it,
                             Handle<mirror::DexCache> dex_cache,
                             Handle<mirror::ClassLoader> class_loader,
+                            CompilerCallbacks* callbacks,
                             bool allow_soft_failures,
                             bool log_hard_failures,
                             bool need_precise_constants,
@@ -350,6 +380,7 @@
                                   const DexFile::CodeItem* code_item,
                                   ArtMethod* method,
                                   uint32_t method_access_flags,
+                                  CompilerCallbacks* callbacks,
                                   bool allow_soft_failures,
                                   bool log_hard_failures,
                                   bool need_precise_constants,
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index c4123d5..946f842 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -37,8 +37,8 @@
 
     // Verify the class
     std::string error_msg;
-    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, true, true, &error_msg) == MethodVerifier::kNoFailure)
-        << error_msg;
+    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, nullptr, true, true, &error_msg)
+                    == MethodVerifier::kNoFailure) << error_msg;
   }
 
   void VerifyDexFile(const DexFile& dex)
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e2c3afb..8300921 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -140,6 +140,7 @@
 jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
+jfieldID WellKnownClasses::java_lang_ThreadGroup_ngroups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_name;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_parent;
@@ -268,7 +269,7 @@
   java_lang_Thread_init = CacheMethod(env, java_lang_Thread, false, "<init>", "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
   java_lang_Thread_run = CacheMethod(env, java_lang_Thread, false, "run", "()V");
   java_lang_Thread__UncaughtExceptionHandler_uncaughtException = CacheMethod(env, java_lang_Thread__UncaughtExceptionHandler, false, "uncaughtException", "(Ljava/lang/Thread;Ljava/lang/Throwable;)V");
-  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "removeThread", "(Ljava/lang/Thread;)V");
+  java_lang_ThreadGroup_removeThread = CacheMethod(env, java_lang_ThreadGroup, false, "threadTerminated", "(Ljava/lang/Thread;)V");
   java_nio_DirectByteBuffer_init = CacheMethod(env, java_nio_DirectByteBuffer, false, "<init>", "(JI)V");
   libcore_reflect_AnnotationFactory_createAnnotation = CacheMethod(env, libcore_reflect_AnnotationFactory, true, "createAnnotation", "(Ljava/lang/Class;[Llibcore/reflect/AnnotationMember;)Ljava/lang/annotation/Annotation;");
   libcore_reflect_AnnotationMember_init = CacheMethod(env, libcore_reflect_AnnotationMember, false, "<init>", "(Ljava/lang/String;Ljava/lang/Object;Ljava/lang/Class;Ljava/lang/reflect/Method;)V");
@@ -340,9 +341,10 @@
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
-  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
+  java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtExceptionHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
-  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "Ljava/util/List;");
+  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "[Ljava/lang/ThreadGroup;");
+  java_lang_ThreadGroup_ngroups = CacheField(env, java_lang_ThreadGroup, false, "ngroups", "I");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
   java_lang_ThreadGroup_parent = CacheField(env, java_lang_ThreadGroup, false, "parent", "Ljava/lang/ThreadGroup;");
@@ -350,13 +352,13 @@
   java_lang_Throwable_cause = CacheField(env, java_lang_Throwable, false, "cause", "Ljava/lang/Throwable;");
   java_lang_Throwable_detailMessage = CacheField(env, java_lang_Throwable, false, "detailMessage", "Ljava/lang/String;");
   java_lang_Throwable_stackTrace = CacheField(env, java_lang_Throwable, false, "stackTrace", "[Ljava/lang/StackTraceElement;");
-  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "stackState", "Ljava/lang/Object;");
+  java_lang_Throwable_stackState = CacheField(env, java_lang_Throwable, false, "backtrace", "Ljava/lang/Object;");
   java_lang_Throwable_suppressedExceptions = CacheField(env, java_lang_Throwable, false, "suppressedExceptions", "Ljava/util/List;");
   java_lang_reflect_AbstractMethod_artMethod = CacheField(env, java_lang_reflect_AbstractMethod, false, "artMethod", "J");
   java_lang_reflect_Proxy_h = CacheField(env, java_lang_reflect_Proxy, false, "h", "Ljava/lang/reflect/InvocationHandler;");
   java_nio_DirectByteBuffer_capacity = CacheField(env, java_nio_DirectByteBuffer, false, "capacity", "I");
-  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "effectiveDirectAddress", "J");
-  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "array", "[Ljava/lang/Object;");
+  java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "address", "J");
+  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "elementData", "[Ljava/lang/Object;");
   java_util_ArrayList_size = CacheField(env, java_util_ArrayList, false, "size", "I");
   java_util_Collections_EMPTY_LIST = CacheField(env, java_util_Collections, true, "EMPTY_LIST", "Ljava/util/List;");
   libcore_util_EmptyArray_STACK_TRACE_ELEMENT = CacheField(env, libcore_util_EmptyArray, true, "STACK_TRACE_ELEMENT", "[Ljava/lang/StackTraceElement;");
@@ -379,7 +381,10 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad =
+      CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad",
+                  "(Ljava/lang/String;Ljava/lang/ClassLoader;ZLjava/lang/String;Ljava/lang/String;)"
+                      "Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index c856291..55158a7 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -153,6 +153,7 @@
   static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
   static jfieldID java_lang_ThreadGroup_groups;
+  static jfieldID java_lang_ThreadGroup_ngroups;
   static jfieldID java_lang_ThreadGroup_mainThreadGroup;
   static jfieldID java_lang_ThreadGroup_name;
   static jfieldID java_lang_ThreadGroup_parent;
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index aded30c..4fc7ee2 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -32,7 +32,7 @@
 
 TEST_F(ZipArchiveTest, FindAndExtract) {
   std::string error_msg;
-  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileNames()[0].c_str(), &error_msg));
   ASSERT_TRUE(zip_archive.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
   std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
diff --git a/test/003-omnibus-opcodes/expected.txt b/test/003-omnibus-opcodes/expected.txt
index b591a7a..ee25ec1 100644
--- a/test/003-omnibus-opcodes/expected.txt
+++ b/test/003-omnibus-opcodes/expected.txt
@@ -31,15 +31,7 @@
 FloatMath.checkConvI
 FloatMath.checkConvL
 FloatMath.checkConvF
- 0: -2.0054409E9
- 1: -8.613303E18
- 2: -3.1415927
--2.0054409E9, -8.6133031E18, -3.1415927
 FloatMath.checkConvD
- 0: -2.005440939E9
- 1: -8.613303245920329E18
- 2: 123.45600128173828
--2.005440939E9, -8.6133032459203287E18, 123.4560012817382
 FloatMath.checkConsts
 FloatMath.jlmTests
 IntMath.testIntCompare
diff --git a/test/003-omnibus-opcodes/src/FloatMath.java b/test/003-omnibus-opcodes/src/FloatMath.java
index a0bc9f4..96befe9 100644
--- a/test/003-omnibus-opcodes/src/FloatMath.java
+++ b/test/003-omnibus-opcodes/src/FloatMath.java
@@ -245,10 +245,9 @@
     }
     static void checkConvF(float[] results) {
         System.out.println("FloatMath.checkConvF");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.0054409E9, -8.6133031E18, -3.1415927");
+        Main.assertTrue(results[0] == -2.0054409E9f);
+        Main.assertTrue(results[1] == -8.613303E18f);
+        Main.assertTrue(results[2] == -3.1415927f);
     }
 
     static double[] convD(int i, long l, float f) {
@@ -260,10 +259,9 @@
     }
     static void checkConvD(double[] results) {
         System.out.println("FloatMath.checkConvD");
-        // TODO: Main.assertTrue values
-        for (int i = 0; i < results.length; i++)
-            System.out.println(" " + i + ": " + results[i]);
-        System.out.println("-2.005440939E9, -8.6133032459203287E18, 123.4560012817382");
+        Main.assertTrue(results[0] == -2.005440939E9);
+        Main.assertTrue(results[1] == -8.6133032459203287E18);
+        Main.assertTrue(results[2] == 123.45600128173828);
     }
 
     static void checkConsts() {
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 3f00a1a..057b351 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -21,6 +21,8 @@
 
 # android.test.anno.MissingAnnotation is available at compile time...
 ${JAVAC} -d classes `find src -name '*.java'`
+# overwrite RenamedEnum
+${JAVAC} -d classes `find src2 -name '*.java'`
 
 # ...but not at run time.
 rm 'classes/android/test/anno/MissingAnnotation.class'
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index e1c3dad..3d9fd8b 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -89,7 +89,7 @@
   annotations on FIELD int android.test.anno.FullyNoted.mBar:
     @android.test.anno.AnnoFancyField(nombre=fubar)
       interface android.test.anno.AnnoFancyField
-    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / class $Proxy13
+    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / true
     --> nombre is 'fubar'
 
 SimplyNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
@@ -108,3 +108,4 @@
 
 Get annotation with missing class should not throw
 Got expected TypeNotPresentException
+Got expected NoSuchFieldError
diff --git a/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
new file mode 100644
index 0000000..7a15c64
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface AnnoRenamedEnumMethod {
+    RenamedEnumClass.RenamedEnum renamed() default RenamedEnumClass.RenamedEnum.FOO;
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..cfba819
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOO, BAR };
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedNoted.java b/test/005-annotations/src/android/test/anno/RenamedNoted.java
new file mode 100644
index 0000000..aae3a3f
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedNoted.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+public class RenamedNoted {
+    @AnnoRenamedEnumMethod(renamed=RenamedEnumClass.RenamedEnum.BAR)
+    public int bar() {
+        return 0;
+    }
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 7b74a73..bc89f16 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -1,9 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package android.test.anno;
 
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
 import java.util.TreeMap;
 
 public class TestAnnotations {
@@ -65,7 +82,7 @@
             AnnoFancyField aff;
             aff = (AnnoFancyField) f.getAnnotation(AnnoFancyField.class);
             if (aff != null) {
-                System.out.println("    aff: " + aff + " / " + aff.getClass());
+                System.out.println("    aff: " + aff + " / " + Proxy.isProxyClass(aff.getClass()));
                 System.out.println("    --> nombre is '" + aff.nombre() + "'");
             }
         }
@@ -199,5 +216,15 @@
         } catch (TypeNotPresentException expected) {
             System.out.println("Got expected TypeNotPresentException");
         }
+
+        // Test renamed enums.
+        try {
+            for (Method m: RenamedNoted.class.getDeclaredMethods()) {
+                Annotation[] annos = m.getDeclaredAnnotations();
+                System.out.println("  annotations on METH " + m + ":");
+            }
+        } catch (NoSuchFieldError expected) {
+            System.out.println("Got expected NoSuchFieldError");
+        }
     }
 }
diff --git a/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..5a2fe36
--- /dev/null
+++ b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOOBAR };
+}
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 9e3477a..b8231f1 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -60,7 +60,7 @@
         } catch (NullPointerException npe) {
             System.out.print("Got an NPE: ");
             System.out.println(npe.getMessage());
-            npe.printStackTrace();
+            npe.printStackTrace(System.out);
         }
     }
     public static void main (String args[]) {
@@ -103,7 +103,7 @@
                 System.out.println(e.getCause());
             }
         } catch (Exception error) {
-            error.printStackTrace();
+            error.printStackTrace(System.out);
         }
     }
 
@@ -126,7 +126,7 @@
                 System.out.println(e.getCause());
             }
         } catch (Exception error) {
-            error.printStackTrace();
+            error.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt
index de99872..72656ae 100644
--- a/test/031-class-attributes/expected.txt
+++ b/test/031-class-attributes/expected.txt
@@ -84,7 +84,7 @@
   enclosingCon: null
   enclosingMeth: null
   modifiers: 1
-  package: package otherpackage
+  package: package otherpackage, Unknown, version 0.0
   declaredClasses: [0]
   member classes: [0]
   isAnnotation: false
diff --git a/test/034-call-null/expected.txt b/test/034-call-null/expected.txt
index 343226f..4e0281e 100644
--- a/test/034-call-null/expected.txt
+++ b/test/034-call-null/expected.txt
@@ -1,2 +1,2 @@
-java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke direct method 'void Main.doStuff(int, int[][], java.lang.String, java.lang.String[][])' on a null object reference
 	at Main.main(Main.java:26)
diff --git a/test/038-inner-null/expected.txt b/test/038-inner-null/expected.txt
index ba411f0..2e92564 100644
--- a/test/038-inner-null/expected.txt
+++ b/test/038-inner-null/expected.txt
@@ -1,4 +1,4 @@
 new Special()
-java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
+Exception in thread "main" java.lang.NullPointerException: Attempt to invoke virtual method 'void Main$Blort.repaint()' on a null object reference
 	at Main$Special.callInner(Main.java:31)
 	at Main.main(Main.java:20)
diff --git a/test/042-new-instance/expected.txt b/test/042-new-instance/expected.txt
index 7d843d1..c5de313 100644
--- a/test/042-new-instance/expected.txt
+++ b/test/042-new-instance/expected.txt
@@ -9,3 +9,4 @@
 Cons got expected PackageAccess complaint
 Cons got expected InstantationException
 Cons got expected PackageAccess2 complaint
+Cons ConstructorAccess succeeded
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index b0a5fd4..8cd6b2e 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -156,6 +156,14 @@
             ex.printStackTrace();
         }
 
+        // should succeed
+        try {
+            otherpackage.ConstructorAccess.newConstructorInstance();
+            System.out.println("Cons ConstructorAccess succeeded");
+        } catch (Exception ex) {
+            System.err.println("Cons ConstructorAccess failed");
+            ex.printStackTrace();
+        }
     }
 
     class InnerClass {
@@ -173,7 +181,6 @@
     public LocalClass2() {}
 }
 
-
 class LocalClass3 {
     public static void main() {
         try {
diff --git a/test/042-new-instance/src/otherpackage/ConstructorAccess.java b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
new file mode 100644
index 0000000..a74e9a0
--- /dev/null
+++ b/test/042-new-instance/src/otherpackage/ConstructorAccess.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package otherpackage;
+
+import java.lang.reflect.Constructor;
+
+public class ConstructorAccess {
+
+    static class Inner {
+      Inner() {}
+    }
+
+    // Test for regression in b/25817515. Inner class constructor should
+    // be accessible from this static method, but if we over-shoot and check
+    // accessibility using the frame below (in Main class), we will see an
+    // IllegalAccessException from #newInstance
+    static public void newConstructorInstance() throws Exception {
+      Class c = Inner.class;
+      Constructor cons = c.getDeclaredConstructor((Class[]) null);
+      Object obj = cons.newInstance();
+    }
+}
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index 052c8fa..be7023e 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -42,7 +42,7 @@
  (no args)
 --- blob
 Success: method blob res=mix
-$Proxy1.getTrace null:-1
+$PROXY_CLASS_NAME0$.getTrace null:-1
 Invoke public abstract void Shapes.upChuck()
  (no args)
 Got expected ioobe
@@ -51,7 +51,7 @@
 Got expected ie
 
 Proxy interfaces: [interface Quads, interface Colors, interface Trace]
-Proxy methods: [public final java.lang.String $Proxy1.blob(), public final double $Proxy1.blue(int), public final R0a $Proxy1.checkMe(), public final R0aa $Proxy1.checkMe(), public final R0base $Proxy1.checkMe(), public final void $Proxy1.circle(int), public final boolean $Proxy1.equals(java.lang.Object), public final void $Proxy1.getTrace(), public final int $Proxy1.green(double), public final int $Proxy1.hashCode(), public final int $Proxy1.mauve(java.lang.String), public final int $Proxy1.rectangle(int,int), public final int $Proxy1.red(float), public final int $Proxy1.square(int,int), public final java.lang.String $Proxy1.toString(), public final int $Proxy1.trapezoid(int,double,int), public final void $Proxy1.upCheck() throws java.lang.InterruptedException, public final void $Proxy1.upChuck()]
+Proxy methods: [public final java.lang.String $PROXY_CLASS_NAME0$.blob(), public final double $PROXY_CLASS_NAME0$.blue(int), public final R0a $PROXY_CLASS_NAME0$.checkMe(), public final R0aa $PROXY_CLASS_NAME0$.checkMe(), public final R0base $PROXY_CLASS_NAME0$.checkMe(), public final void $PROXY_CLASS_NAME0$.circle(int), public final boolean $PROXY_CLASS_NAME0$.equals(java.lang.Object), public final void $PROXY_CLASS_NAME0$.getTrace(), public final int $PROXY_CLASS_NAME0$.green(double), public final int $PROXY_CLASS_NAME0$.hashCode(), public final int $PROXY_CLASS_NAME0$.mauve(java.lang.String), public final int $PROXY_CLASS_NAME0$.rectangle(int,int), public final int $PROXY_CLASS_NAME0$.red(float), public final int $PROXY_CLASS_NAME0$.square(int,int), public final java.lang.String $PROXY_CLASS_NAME0$.toString(), public final int $PROXY_CLASS_NAME0$.trapezoid(int,double,int), public final void $PROXY_CLASS_NAME0$.upCheck() throws java.lang.InterruptedException, public final void $PROXY_CLASS_NAME0$.upChuck()]
 Decl annos: []
 Param annos (0) : []
 Modifiers: 17
@@ -84,7 +84,7 @@
 Invoke public abstract void InterfaceW1.bothThrowBase() throws BaseException,SubException,SubSubException
  (no args)
 Got expected exception
-Proxy methods: [public final boolean $Proxy3.equals(java.lang.Object), public final java.lang.Object $Proxy3.foo(), public final java.lang.String $Proxy3.foo(), public final int $Proxy3.hashCode(), public final java.lang.String $Proxy3.toString()]
+Proxy methods: [public final boolean $PROXY_CLASS_NAME1$.equals(java.lang.Object), public final java.lang.Object $PROXY_CLASS_NAME1$.foo(), public final java.lang.String $PROXY_CLASS_NAME1$.foo(), public final int $PROXY_CLASS_NAME1$.hashCode(), public final java.lang.String $PROXY_CLASS_NAME1$.toString()]
 Invocation of public abstract java.lang.String NarrowingTest$I2.foo()
 Invoking foo using I2 type: hello
 Invocation of public abstract java.lang.Object NarrowingTest$I1.foo()
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 1573297..445a6cc 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -84,7 +84,8 @@
         });
         System.out.println("Proxy interfaces: " +
             Arrays.deepToString(proxy.getClass().getInterfaces()));
-        System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+        System.out.println("Proxy methods: " +
+            Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
         Method meth = methods[methods.length -1];
         System.out.println("Decl annos: " + Arrays.deepToString(meth.getDeclaredAnnotations()));
         Annotation[][] paramAnnos = meth.getParameterAnnotations();
@@ -100,6 +101,7 @@
         /* create the proxy class */
         Class proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
                             new Class[] { Quads.class, Colors.class, Trace.class });
+        Main.registerProxyClassName(proxyClass.getCanonicalName());
 
         /* create a proxy object, passing the handler object in */
         Object proxy = null;
@@ -262,7 +264,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
@@ -276,7 +279,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
diff --git a/test/044-proxy/src/Main.java b/test/044-proxy/src/Main.java
index 05e8e5b..1f23b95 100644
--- a/test/044-proxy/src/Main.java
+++ b/test/044-proxy/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.util.HashMap;
+
 /**
  * Test java.lang.reflect.Proxy
  */
@@ -30,4 +32,24 @@
         FloatSelect.main(null);
         NativeProxy.main(args);
     }
+
+    // The following code maps from the actual proxy class names (eg $Proxy2) to their test output
+    // names (eg $PROXY_CLASS_NAME1$). This is to avoid the flaky test failures due to potentially
+    // undeterministic proxy class naming.
+
+    public static void registerProxyClassName(String proxyClassName) {
+        proxyClassNameMap.put(proxyClassName,
+                              "$PROXY_CLASS_NAME" + (uniqueTestProxyClassNum++) + "$");
+    }
+
+    public static String replaceProxyClassNamesForOutput(String str) {
+        for (String key : proxyClassNameMap.keySet()) {
+            str = str.replace(key, proxyClassNameMap.get(key));
+        }
+        return str;
+    }
+
+    private static final HashMap<String, String> proxyClassNameMap = new HashMap<String, String>();
+
+    private static int uniqueTestProxyClassNum = 0;
 }
diff --git a/test/044-proxy/src/NarrowingTest.java b/test/044-proxy/src/NarrowingTest.java
index 3b94b76..5b80d72 100644
--- a/test/044-proxy/src/NarrowingTest.java
+++ b/test/044-proxy/src/NarrowingTest.java
@@ -45,9 +45,11 @@
                        }
                    }
                });
+       Main.registerProxyClassName(proxy.getClass().getCanonicalName());
 
        Method[] methods = proxy.getClass().getDeclaredMethods();
-       System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+       System.out.println("Proxy methods: " +
+                          Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
 
        System.out.println("Invoking foo using I2 type: " + proxy.foo());
 
diff --git a/test/046-reflect/expected.txt b/test/046-reflect/expected.txt
index d657d44..06932b9 100644
--- a/test/046-reflect/expected.txt
+++ b/test/046-reflect/expected.txt
@@ -96,8 +96,8 @@
 got expected exception for Constructor.newInstance
 ReflectTest done!
 public method
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=false
-static java.lang.Object java.util.Collections.checkType(java.lang.Object,java.lang.Class) accessible=true
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=false
+private static void java.util.Collections.swap(java.lang.Object[],int,int) accessible=true
 checkType invoking null
 checkType got expected exception
 calling const-class FieldNoisyInitUser.class
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 0c90109..67a0d11 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -407,12 +407,13 @@
         System.out.println("ReflectTest done!");
     }
 
-    public static void checkType() {
+    public static void checkSwap() {
         Method m;
 
+        final Object[] objects = new Object[2];
         try {
-            m = Collections.class.getDeclaredMethod("checkType",
-                            Object.class, Class.class);
+            m = Collections.class.getDeclaredMethod("swap",
+                            Object[].class, int.class, int.class);
         } catch (NoSuchMethodException nsme) {
             nsme.printStackTrace();
             return;
@@ -421,7 +422,7 @@
         m.setAccessible(true);
         System.out.println(m + " accessible=" + m.isAccessible());
         try {
-            m.invoke(null, new Object(), Object.class);
+            m.invoke(null, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -432,7 +433,7 @@
 
         try {
             String s = "Should be ignored";
-            m.invoke(s, new Object(), Object.class);
+            m.invoke(s, objects, 0, 1);
         } catch (IllegalAccessException iae) {
             iae.printStackTrace();
             return;
@@ -443,7 +444,8 @@
 
         try {
             System.out.println("checkType invoking null");
-            m.invoke(null, new Object(), int.class);
+            // Trigger an NPE at the target.
+            m.invoke(null, null, 0, 1);
             System.out.println("ERROR: should throw InvocationTargetException");
         } catch (InvocationTargetException ite) {
             System.out.println("checkType got expected exception");
@@ -710,27 +712,27 @@
     private static void checkGetDeclaredConstructor() {
         try {
             Method.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Method.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Field.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Field.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
         try {
             Class.class.getDeclaredConstructor().setAccessible(true);
-            System.out.print("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
+            System.out.println("Didn't get an exception from Class.class.getDeclaredConstructor().setAccessible");
         } catch (SecurityException e) {
         } catch (NoSuchMethodException e) {
         } catch (Exception e) {
-            System.out.print(e);
+            System.out.println(e);
         }
     }
 
@@ -744,7 +746,7 @@
 
         checkGetDeclaredConstructor();
         checkAccess();
-        checkType();
+        checkSwap();
         checkClinitForFields();
         checkClinitForMethods();
         checkGeneric();
diff --git a/test/055-enum-performance/src/Main.java b/test/055-enum-performance/src/Main.java
index d5903af..d6bb211 100644
--- a/test/055-enum-performance/src/Main.java
+++ b/test/055-enum-performance/src/Main.java
@@ -20,7 +20,7 @@
             throw new AssertionError();
         } catch (InvocationTargetException expected) {
             IllegalArgumentException iae = (IllegalArgumentException) expected.getCause();
-            if (!iae.getMessage().equals("class java.lang.String is not an enum type")) {
+            if (!iae.getMessage().equals("class java.lang.String is not an enum type.")) {
                 throw new AssertionError();
             }
         }
diff --git a/test/063-process-manager/expected.txt b/test/063-process-manager/expected.txt
index 8360239..8c01bf0 100644
--- a/test/063-process-manager/expected.txt
+++ b/test/063-process-manager/expected.txt
@@ -4,12 +4,12 @@
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 spawning child #2
 spawning child
 process manager: RUNNABLE
 child died
-process manager: WAITING
+process manager: TIMED_WAITING
 
 done!
diff --git a/test/063-process-manager/src/Main.java b/test/063-process-manager/src/Main.java
index 68bf878..e9e522c 100644
--- a/test/063-process-manager/src/Main.java
+++ b/test/063-process-manager/src/Main.java
@@ -30,7 +30,7 @@
                  traces.entrySet()) {
             Thread t = entry.getKey();
             String name = t.getName();
-            if (name.equals("java.lang.ProcessManager")) {
+            if (name.indexOf("process reaper") >= 0) {
                 System.out.println("process manager: " + t.getState());
                 found = true;
             }
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 5913c40..af25d9b 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -803,7 +803,7 @@
     Assert.assertEquals(Math.round(-2.5d), -2l);
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
-    Assert.assertEquals(Math.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -1034,7 +1034,7 @@
     Assert.assertEquals(StrictMath.round(-2.5d), -2l);
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
-    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 1l);
+    Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index c932761..e4988c9 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -31,9 +31,9 @@
 30 (class java.lang.Integer)
 62 (class java.lang.Long)
 14 (class java.lang.Short)
-[public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
+[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, private static int java.lang.String.HASHING_SEED, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[int java.lang.String.hash32(), native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.getHashingSeed(), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index bf3a574..1245852 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -157,10 +157,28 @@
     System.out.println(o + " (" + (o != null ? o.getClass() : "null") + ")");
   }
 
+  /**
+   * Sorts the input array using the comparator and returns the sorted array.
+   */
+  private static Object[] sort(Object[] objects, Comparator<Object> comp) {
+    Arrays.sort(objects, comp);
+    return objects;
+  }
+
   public static void testMethodReflection() throws Exception {
-    System.out.println(Arrays.toString(String.class.getDeclaredConstructors()));
-    System.out.println(Arrays.toString(String.class.getDeclaredFields()));
-    System.out.println(Arrays.toString(String.class.getDeclaredMethods()));
+    Comparator<Object> comp = new Comparator<Object>() {
+      public int compare(Object a, Object b) {
+        return a.toString().compareTo(b.toString());
+      }
+      public boolean equals(Object b) {
+        return this == b;
+      }
+    };
+
+    // Sort the return values by their string values since the order is undefined by the spec.
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredConstructors(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredFields(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredMethods(), comp)));
 
     System.out.println(Arrays.toString(Main.class.getInterfaces()));
     System.out.println(Arrays.toString(String.class.getInterfaces()));
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index e9946c8..b70ca4f 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -267,11 +267,20 @@
                                          const char* app_code_cache_dir,
                                          const char* isa ATTRIBUTE_UNUSED) {
   struct stat st;
-  if ((app_code_cache_dir != nullptr)
-      && (stat(app_code_cache_dir, &st) == 0)
-      && S_ISDIR(st.st_mode)) {
-    printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+  if (app_code_cache_dir != nullptr) {
+    if (stat(app_code_cache_dir, &st) == 0) {
+      if (S_ISDIR(st.st_mode)) {
+        printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+      } else {
+        printf("Code cache is not a directory.\n");
+      }
+    } else {
+      perror("Error when stat-ing the code_cache:");
+    }
+  } else {
+    printf("app_code_cache_dir is null.\n");
   }
+
   if (art_cbs != nullptr) {
     gNativeBridgeArtCallbacks = art_cbs;
     printf("Native bridge initialized.\n");
diff --git a/test/123-inline-execute2/expected.txt b/test/123-inline-execute2/expected.txt
new file mode 100644
index 0000000..aa74fa3
--- /dev/null
+++ b/test/123-inline-execute2/expected.txt
@@ -0,0 +1,299 @@
+Math.sin(0.0) = 0.000000000000
+Math.sinh(0.0) = 0.000000000000
+Math.asin(0.0) = 0.000000000000
+Math.cos(0.0) = 1.000000000000
+Math.cosh(0.0) = 1.000000000000
+Math.acos(0.0) = 1.570796326795
+Math.tan(0.0) = 0.000000000000
+Math.tanh(0.0) = 0.000000000000
+Math.atan(0.0) = 0.000000000000
+Math.atan2(0.0, 1.0) = 0.000000000000
+Math.sin(0.7853981633974483) = 0.707106781187
+Math.sinh(0.7853981633974483) = 0.868670961486
+Math.asin(0.7853981633974483) = 0.903339110767
+Math.cos(0.7853981633974483) = 0.707106781187
+Math.cosh(0.7853981633974483) = 1.324609089252
+Math.acos(0.7853981633974483) = 0.667457216028
+Math.tan(0.7853981633974483) = 1.000000000000
+Math.tanh(0.7853981633974483) = 0.655794202633
+Math.atan(0.7853981633974483) = 0.665773750028
+Math.atan2(0.7853981633974483, 1.7853981633974483) = 0.414423800577
+Math.sin(1.5707963267948966) = 1.000000000000
+Math.sinh(1.5707963267948966) = 2.301298902307
+Math.asin(1.5707963267948966) = NaN
+Math.cos(1.5707963267948966) = 0.000000000000
+Math.cosh(1.5707963267948966) = 2.509178478658
+Math.acos(1.5707963267948966) = NaN
+Math.tanh(1.5707963267948966) = 0.917152335667
+Math.atan(1.5707963267948966) = 1.003884821854
+Math.atan2(1.5707963267948966, 2.5707963267948966) = 0.548479764417
+Math.sin(2.356194490192345) = 0.707106781187
+Math.sinh(2.356194490192345) = 5.227971924678
+Math.asin(2.356194490192345) = NaN
+Math.cos(2.356194490192345) = -0.707106781187
+Math.cosh(2.356194490192345) = 5.322752149520
+Math.acos(2.356194490192345) = NaN
+Math.tan(2.356194490192345) = -1.000000000000
+Math.tanh(2.356194490192345) = 0.982193380007
+Math.atan(2.356194490192345) = 1.169422824816
+Math.atan2(2.356194490192345, 3.356194490192345) = 0.612096117380
+Math.sin(3.141592653589793) = 0.000000000000
+Math.sinh(3.141592653589793) = 11.548739357258
+Math.asin(3.141592653589793) = NaN
+Math.cos(3.141592653589793) = -1.000000000000
+Math.cosh(3.141592653589793) = 11.591953275522
+Math.acos(3.141592653589793) = NaN
+Math.tan(3.141592653589793) = -0.000000000000
+Math.tanh(3.141592653589793) = 0.996272076221
+Math.atan(3.141592653589793) = 1.262627255679
+Math.atan2(3.141592653589793, 4.141592653589793) = 0.648948780815
+Math.sin(3.9269908169872414) = -0.707106781187
+Math.sinh(3.9269908169872414) = 25.367158319374
+Math.asin(3.9269908169872414) = NaN
+Math.cos(3.9269908169872414) = -0.707106781187
+Math.cosh(3.9269908169872414) = 25.386861192361
+Math.acos(3.9269908169872414) = NaN
+Math.tan(3.9269908169872414) = 1.000000000000
+Math.tanh(3.9269908169872414) = 0.999223894879
+Math.atan(3.9269908169872414) = 1.321447967784
+Math.atan2(3.9269908169872414, 4.926990816987241) = 0.672931229191
+Math.sin(4.71238898038469) = -1.000000000000
+Math.sinh(4.71238898038469) = 55.654397599418
+Math.asin(4.71238898038469) = NaN
+Math.cos(4.71238898038469) = -0.000000000000
+Math.cosh(4.71238898038469) = 55.663380890439
+Math.acos(4.71238898038469) = NaN
+Math.tanh(4.71238898038469) = 0.999838613989
+Math.atan(4.71238898038469) = 1.361691682971
+Math.atan2(4.71238898038469, 5.71238898038469) = 0.689765469251
+Math.sin(5.497787143782138) = -0.707106781187
+Math.sinh(5.497787143782138) = 122.073483514693
+Math.asin(5.497787143782138) = NaN
+Math.cos(5.497787143782138) = 0.707106781187
+Math.cosh(5.497787143782138) = 122.077579339582
+Math.acos(5.497787143782138) = NaN
+Math.tan(5.497787143782138) = -1.000000000000
+Math.tanh(5.497787143782138) = 0.999966449000
+Math.atan(5.497787143782138) = 1.390871988014
+Math.atan2(5.497787143782138, 6.497787143782138) = 0.702226398171
+Math.sin(6.283185307179586) = -0.000000000000
+Math.sinh(6.283185307179586) = 267.744894041016
+Math.asin(6.283185307179586) = NaN
+Math.cos(6.283185307179586) = 1.000000000000
+Math.cosh(6.283185307179586) = 267.746761483748
+Math.acos(6.283185307179586) = NaN
+Math.tan(6.283185307179586) = -0.000000000000
+Math.tanh(6.283185307179586) = 0.999993025340
+Math.atan(6.283185307179586) = 1.412965136507
+Math.atan2(6.283185307179586, 7.283185307179586) = 0.711819549590
+Math.cbrt(-3.0) = -1.442249570307
+Math.log(-3.0) = NaN
+Math.log10(-3.0) = NaN
+Math.log1p(-3.0) = NaN
+Math.exp(-3.0) = 0.049787068368
+Math.expm1(-3.0) = -0.950212931632
+Math.pow(-3.0, -2.0) = 0.111111111111
+Math.hypot(-3.0, -2.0) = 3.605551275464
+Math.cbrt(-2.0) = -1.259921049895
+Math.log(-2.0) = NaN
+Math.log10(-2.0) = NaN
+Math.log1p(-2.0) = NaN
+Math.exp(-2.0) = 0.135335283237
+Math.expm1(-2.0) = -0.864664716763
+Math.pow(-2.0, -1.0) = -0.500000000000
+Math.hypot(-2.0, -1.0) = 2.236067977500
+Math.cbrt(-1.0) = -1.000000000000
+Math.log(-1.0) = NaN
+Math.log10(-1.0) = NaN
+Math.log1p(-1.0) = -Infinity
+Math.exp(-1.0) = 0.367879441171
+Math.expm1(-1.0) = -0.632120558829
+Math.pow(-1.0, 0.0) = 1.000000000000
+Math.hypot(-1.0, 0.0) = 1.000000000000
+Math.cbrt(0.0) = 0.000000000000
+Math.log(0.0) = -Infinity
+Math.log10(0.0) = -Infinity
+Math.log1p(0.0) = 0.000000000000
+Math.exp(0.0) = 1.000000000000
+Math.expm1(0.0) = 0.000000000000
+Math.pow(0.0, 1.0) = 0.000000000000
+Math.hypot(0.0, 1.0) = 1.000000000000
+Math.cbrt(1.0) = 1.000000000000
+Math.log(1.0) = 0.000000000000
+Math.log10(1.0) = 0.000000000000
+Math.log1p(1.0) = 0.693147180560
+Math.exp(1.0) = 2.718281828459
+Math.expm1(1.0) = 1.718281828459
+Math.pow(1.0, 2.0) = 1.000000000000
+Math.hypot(1.0, 2.0) = 2.236067977500
+Math.cbrt(2.0) = 1.259921049895
+Math.log(2.0) = 0.693147180560
+Math.log10(2.0) = 0.301029995664
+Math.log1p(2.0) = 1.098612288668
+Math.exp(2.0) = 7.389056098931
+Math.expm1(2.0) = 6.389056098931
+Math.pow(2.0, 3.0) = 8.000000000000
+Math.hypot(2.0, 3.0) = 3.605551275464
+Math.cbrt(3.0) = 1.442249570307
+Math.log(3.0) = 1.098612288668
+Math.log10(3.0) = 0.477121254720
+Math.log1p(3.0) = 1.386294361120
+Math.exp(3.0) = 20.085536923188
+Math.expm1(3.0) = 19.085536923188
+Math.pow(3.0, 4.0) = 81.000000000000
+Math.hypot(3.0, 4.0) = 5.000000000000
+Math.ceil(0.0001) = 1.000000000000
+Math.floor(0.0001) = 0.000000000000
+Math.nextAfter(1.0, 2.0) = 1.000000000000
+Math.nextAfter(2.0, 1.0) = 2.000000000000
+Math.rint(0.5000001) = 1.000000000000
+StrictMath.sin(0.0) = 0.0
+StrictMath.sinh(0.0) = 0.0
+StrictMath.asin(0.0) = 0.0
+StrictMath.cos(0.0) = 1.0
+StrictMath.cosh(0.0) = 1.0
+StrictMath.acos(0.0) = 1.5707963267948966
+StrictMath.tan(0.0) = 0.0
+StrictMath.tanh(0.0) = 0.0
+StrictMath.atan(0.0) = 0.0
+StrictMath.atan2(0.0, 1.0) = 0.0
+StrictMath.sin(0.7853981633974483) = 0.7071067811865475
+StrictMath.sinh(0.7853981633974483) = 0.8686709614860095
+StrictMath.asin(0.7853981633974483) = 0.9033391107665127
+StrictMath.cos(0.7853981633974483) = 0.7071067811865476
+StrictMath.cosh(0.7853981633974483) = 1.3246090892520057
+StrictMath.acos(0.7853981633974483) = 0.6674572160283838
+StrictMath.tan(0.7853981633974483) = 0.9999999999999999
+StrictMath.tanh(0.7853981633974483) = 0.6557942026326724
+StrictMath.atan(0.7853981633974483) = 0.6657737500283538
+StrictMath.atan2(0.7853981633974483, 1.7853981633974483) = 0.41442380057704103
+StrictMath.sin(1.5707963267948966) = 1.0
+StrictMath.sinh(1.5707963267948966) = 2.3012989023072947
+StrictMath.asin(1.5707963267948966) = NaN
+StrictMath.cos(1.5707963267948966) = 6.123233995736766E-17
+StrictMath.cosh(1.5707963267948966) = 2.5091784786580567
+StrictMath.acos(1.5707963267948966) = NaN
+StrictMath.tan(1.5707963267948966) = 1.633123935319537E16
+StrictMath.tanh(1.5707963267948966) = 0.9171523356672744
+StrictMath.atan(1.5707963267948966) = 1.0038848218538872
+StrictMath.atan2(1.5707963267948966, 2.5707963267948966) = 0.5484797644174059
+StrictMath.sin(2.356194490192345) = 0.7071067811865476
+StrictMath.sinh(2.356194490192345) = 5.227971924677803
+StrictMath.asin(2.356194490192345) = NaN
+StrictMath.cos(2.356194490192345) = -0.7071067811865475
+StrictMath.cosh(2.356194490192345) = 5.322752149519959
+StrictMath.acos(2.356194490192345) = NaN
+StrictMath.tan(2.356194490192345) = -1.0000000000000002
+StrictMath.tanh(2.356194490192345) = 0.9821933800072388
+StrictMath.atan(2.356194490192345) = 1.1694228248157563
+StrictMath.atan2(2.356194490192345, 3.356194490192345) = 0.6120961173796371
+StrictMath.sin(3.141592653589793) = 1.2246467991473532E-16
+StrictMath.sinh(3.141592653589793) = 11.548739357257748
+StrictMath.asin(3.141592653589793) = NaN
+StrictMath.cos(3.141592653589793) = -1.0
+StrictMath.cosh(3.141592653589793) = 11.591953275521519
+StrictMath.acos(3.141592653589793) = NaN
+StrictMath.tan(3.141592653589793) = -1.2246467991473532E-16
+StrictMath.tanh(3.141592653589793) = 0.99627207622075
+StrictMath.atan(3.141592653589793) = 1.2626272556789115
+StrictMath.atan2(3.141592653589793, 4.141592653589793) = 0.6489487808147751
+StrictMath.sin(3.9269908169872414) = -0.7071067811865475
+StrictMath.sinh(3.9269908169872414) = 25.367158319374152
+StrictMath.asin(3.9269908169872414) = NaN
+StrictMath.cos(3.9269908169872414) = -0.7071067811865477
+StrictMath.cosh(3.9269908169872414) = 25.386861192360772
+StrictMath.acos(3.9269908169872414) = NaN
+StrictMath.tan(3.9269908169872414) = 0.9999999999999997
+StrictMath.tanh(3.9269908169872414) = 0.9992238948786412
+StrictMath.atan(3.9269908169872414) = 1.3214479677837223
+StrictMath.atan2(3.9269908169872414, 4.926990816987241) = 0.6729312291908799
+StrictMath.sin(4.71238898038469) = -1.0
+StrictMath.sinh(4.71238898038469) = 55.65439759941754
+StrictMath.asin(4.71238898038469) = NaN
+StrictMath.cos(4.71238898038469) = -1.8369701987210297E-16
+StrictMath.cosh(4.71238898038469) = 55.66338089043867
+StrictMath.acos(4.71238898038469) = NaN
+StrictMath.tan(4.71238898038469) = 5.443746451065123E15
+StrictMath.tanh(4.71238898038469) = 0.9998386139886326
+StrictMath.atan(4.71238898038469) = 1.3616916829711636
+StrictMath.atan2(4.71238898038469, 5.71238898038469) = 0.6897654692509959
+StrictMath.sin(5.497787143782138) = -0.7071067811865477
+StrictMath.sinh(5.497787143782138) = 122.07348351469281
+StrictMath.asin(5.497787143782138) = NaN
+StrictMath.cos(5.497787143782138) = 0.7071067811865474
+StrictMath.cosh(5.497787143782138) = 122.07757933958217
+StrictMath.acos(5.497787143782138) = NaN
+StrictMath.tan(5.497787143782138) = -1.0000000000000004
+StrictMath.tanh(5.497787143782138) = 0.9999664489997958
+StrictMath.atan(5.497787143782138) = 1.390871988014422
+StrictMath.atan2(5.497787143782138, 6.497787143782138) = 0.7022263981709682
+StrictMath.sin(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.sinh(6.283185307179586) = 267.74489404101644
+StrictMath.asin(6.283185307179586) = NaN
+StrictMath.cos(6.283185307179586) = 1.0
+StrictMath.cosh(6.283185307179586) = 267.7467614837482
+StrictMath.acos(6.283185307179586) = NaN
+StrictMath.tan(6.283185307179586) = -2.4492935982947064E-16
+StrictMath.tanh(6.283185307179586) = 0.9999930253396107
+StrictMath.atan(6.283185307179586) = 1.4129651365067377
+StrictMath.atan2(6.283185307179586, 7.283185307179586) = 0.7118195495895945
+StrictMath.cbrt(-3.0) = -1.4422495703074083
+StrictMath.log(-3.0) = NaN
+StrictMath.log10(-3.0) = NaN
+StrictMath.log1p(-3.0) = NaN
+StrictMath.exp(-3.0) = 0.049787068367863944
+StrictMath.expm1(-3.0) = -0.950212931632136
+StrictMath.pow(-3.0, -2.0) = 0.1111111111111111
+StrictMath.hypot(-3.0, -2.0) = 3.605551275463989
+StrictMath.cbrt(-2.0) = -1.2599210498948732
+StrictMath.log(-2.0) = NaN
+StrictMath.log10(-2.0) = NaN
+StrictMath.log1p(-2.0) = NaN
+StrictMath.exp(-2.0) = 0.1353352832366127
+StrictMath.expm1(-2.0) = -0.8646647167633873
+StrictMath.pow(-2.0, -1.0) = -0.5
+StrictMath.hypot(-2.0, -1.0) = 2.23606797749979
+StrictMath.cbrt(-1.0) = -1.0
+StrictMath.log(-1.0) = NaN
+StrictMath.log10(-1.0) = NaN
+StrictMath.log1p(-1.0) = -Infinity
+StrictMath.exp(-1.0) = 0.36787944117144233
+StrictMath.expm1(-1.0) = -0.6321205588285577
+StrictMath.pow(-1.0, 0.0) = 1.0
+StrictMath.hypot(-1.0, 0.0) = 1.0
+StrictMath.cbrt(0.0) = 0.0
+StrictMath.log(0.0) = -Infinity
+StrictMath.log10(0.0) = -Infinity
+StrictMath.log1p(0.0) = 0.0
+StrictMath.exp(0.0) = 1.0
+StrictMath.expm1(0.0) = 0.0
+StrictMath.pow(0.0, 1.0) = 0.0
+StrictMath.hypot(0.0, 1.0) = 1.0
+StrictMath.cbrt(1.0) = 1.0
+StrictMath.log(1.0) = 0.0
+StrictMath.log10(1.0) = 0.0
+StrictMath.log1p(1.0) = 0.6931471805599453
+StrictMath.exp(1.0) = 2.7182818284590455
+StrictMath.expm1(1.0) = 1.718281828459045
+StrictMath.pow(1.0, 2.0) = 1.0
+StrictMath.hypot(1.0, 2.0) = 2.23606797749979
+StrictMath.cbrt(2.0) = 1.2599210498948732
+StrictMath.log(2.0) = 0.6931471805599453
+StrictMath.log10(2.0) = 0.3010299956639812
+StrictMath.log1p(2.0) = 1.0986122886681096
+StrictMath.exp(2.0) = 7.38905609893065
+StrictMath.expm1(2.0) = 6.38905609893065
+StrictMath.pow(2.0, 3.0) = 8.0
+StrictMath.hypot(2.0, 3.0) = 3.605551275463989
+StrictMath.cbrt(3.0) = 1.4422495703074083
+StrictMath.log(3.0) = 1.0986122886681096
+StrictMath.log10(3.0) = 0.47712125471966244
+StrictMath.log1p(3.0) = 1.3862943611198906
+StrictMath.exp(3.0) = 20.085536923187668
+StrictMath.expm1(3.0) = 19.085536923187668
+StrictMath.pow(3.0, 4.0) = 81.0
+StrictMath.hypot(3.0, 4.0) = 5.0
+StrictMath.ceil(0.0001) = 1.0
+StrictMath.floor(0.0001) = 0.0
+StrictMath.nextAfter(1.0, 2.0) = 1.0000000000000002
+StrictMath.rint(0.5000001) = 1.0
diff --git a/test/123-inline-execute2/info.txt b/test/123-inline-execute2/info.txt
new file mode 100644
index 0000000..4a728a7
--- /dev/null
+++ b/test/123-inline-execute2/info.txt
@@ -0,0 +1 @@
+Sanity checks for added InlineNative methods.
diff --git a/test/123-inline-execute2/src/Main.java b/test/123-inline-execute2/src/Main.java
new file mode 100644
index 0000000..9fadcfd
--- /dev/null
+++ b/test/123-inline-execute2/src/Main.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Locale;
+
+public class Main {
+  public static void main(String args[]) {
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (Math.PI / 180.0);
+      System.out.println("Math.sin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sin(d)));
+
+      System.out.println("Math.sinh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.sinh(d)));
+      System.out.println("Math.asin(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.asin(d)));
+      System.out.println("Math.cos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cos(d)));
+      System.out.println("Math.cosh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.cosh(d)));
+      System.out.println("Math.acos(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.acos(d)));
+      if ((i + 90) % 180 != 0) {
+        System.out.println("Math.tan(" + d + ") = "
+            + String.format(Locale.US, "%.12f", Math.tan(d)));
+      }
+      System.out.println("Math.tanh(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.tanh(d)));
+      System.out.println("Math.atan(" + d + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan(d)));
+      System.out.println("Math.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.atan2(d, d + 1.0)));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("Math.cbrt(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.cbrt(e)));
+      System.out.println("Math.log(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log(e)));
+      System.out.println("Math.log10(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log10(e)));
+      System.out.println("Math.log1p(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.log1p(e)));
+      System.out.println("Math.exp(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.exp(e)));
+      System.out.println("Math.expm1(" + e + ") = "
+          + String.format(Locale.US, "%.12f", Math.expm1(e)));
+      System.out.println("Math.pow(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.pow(e, e + 1.0)));
+      System.out.println("Math.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + String.format(Locale.US, "%.12f", Math.hypot(e, e + 1.0)));
+    }
+
+    System.out.println("Math.ceil(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.ceil(0.0001)));
+    System.out.println("Math.floor(0.0001) = "
+        + String.format(Locale.US, "%.12f", Math.floor(0.0001)));
+    System.out.println("Math.nextAfter(1.0, 2.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(1.0, 2.0)));
+    System.out.println("Math.nextAfter(2.0, 1.0) = "
+        + String.format(Locale.US, "%.12f", Math.nextAfter(2.0, 1.0)));
+    System.out.println("Math.rint(0.5000001) = "
+        + String.format(Locale.US, "%.12f", Math.rint(0.5000001)));
+
+    for (int i = 0; i <= 360; i += 45) {
+      double d = i * (StrictMath.PI / 180.0);
+      System.out.println("StrictMath.sin(" + d + ") = " + StrictMath.sin(d));
+      System.out.println("StrictMath.sinh(" + d + ") = " + StrictMath.sinh(d));
+      System.out.println("StrictMath.asin(" + d + ") = " + StrictMath.asin(d));
+      System.out.println("StrictMath.cos(" + d + ") = " + StrictMath.cos(d));
+      System.out.println("StrictMath.cosh(" + d + ") = " + StrictMath.cosh(d));
+      System.out.println("StrictMath.acos(" + d + ") = " + StrictMath.acos(d));
+      System.out.println("StrictMath.tan(" + d + ") = " + StrictMath.tan(d));
+      System.out.println("StrictMath.tanh(" + d + ") = " + StrictMath.tanh(d));
+      System.out.println("StrictMath.atan(" + d + ") = " + StrictMath.atan(d));
+      System.out.println("StrictMath.atan2(" + d + ", " + (d + 1.0) + ") = "
+          + StrictMath.atan2(d, d + 1.0));
+    }
+
+    for (int j = -3; j <= 3; j++) {
+      double e = (double) j;
+      System.out.println("StrictMath.cbrt(" + e + ") = " + StrictMath.cbrt(e));
+      System.out.println("StrictMath.log(" + e + ") = " + StrictMath.log(e));
+      System.out.println("StrictMath.log10(" + e + ") = " + StrictMath.log10(e));
+      System.out.println("StrictMath.log1p(" + e + ") = " + StrictMath.log1p(e));
+      System.out.println("StrictMath.exp(" + e + ") = " + StrictMath.exp(e));
+      System.out.println("StrictMath.expm1(" + e + ") = " + StrictMath.expm1(e));
+      System.out.println("StrictMath.pow(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.pow(e, e + 1.0));
+      System.out.println("StrictMath.hypot(" + e + ", " + (e + 1.0) + ") = "
+          + StrictMath.hypot(e, e + 1.0));
+    }
+
+    System.out.println("StrictMath.ceil(0.0001) = " + StrictMath.ceil(0.0001));
+    System.out.println("StrictMath.floor(0.0001) = " + StrictMath.floor(0.0001));
+    System.out.println("StrictMath.nextAfter(1.0, 2.0) = " + StrictMath.nextAfter(1.0, 2.0));
+    System.out.println("StrictMath.rint(0.5000001) = " + StrictMath.rint(0.5000001));
+  }
+
+}
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index dc3ef7e..5474c9b 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -117,7 +117,7 @@
       // Could do reflection for the private pid field, but String parsing is easier.
       String s = p.toString();
       if (s.startsWith("Process[pid=")) {
-          return Integer.parseInt(s.substring("Process[pid=".length(), s.length() - 1));
+          return Integer.parseInt(s.substring("Process[pid=".length(), s.indexOf(",")));
       } else {
           return -1;
       }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index ffeae7d..c3d2759 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -652,20 +652,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -685,27 +684,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -725,25 +722,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -770,18 +767,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -816,14 +814,18 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -831,8 +833,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Several HDeoptimize will be added. Two for each index.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -859,7 +861,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -874,23 +875,27 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
+    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -914,20 +919,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -955,26 +959,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -994,15 +1000,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Two HDeoptimize will be added. Two for the index
+    // and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index ec63057..f1f80ca 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -618,6 +618,57 @@
     getSuper();
   }
 
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c32d34a..6151fc1 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -389,24 +389,6 @@
     return arg << 0;
   }
 
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       Return [<<Shl>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Arg>>]
-  /// CHECK-DAG:                       Return [<<Add>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-NOT:                       Shl
-
-  public static int Shl1(int arg) {
-    return arg << 1;
-  }
-
   /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -1226,6 +1208,130 @@
     return arg / -0.25f;
   }
 
+  /**
+   * Test strength reduction of factors of the form (2^n + 1).
+   */
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
+  /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
+  /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
+  /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
+
+  public static int mulPow2Plus1(int arg) {
+    return arg * 9;
+  }
+
+  /**
+   * Test strength reduction of factors of the form (2^n - 1).
+   */
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
+  /// CHECK:                            Mul [<<Arg>>,<<Const31>>]
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
+  /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
+  /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
+
+  public static long mulPow2Minus1(long arg) {
+    return arg * 31;
+  }
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldNotEqualOne() {
+    return (booleanField == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Field>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldEqualZero() {
+    return (booleanField != false) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<GT>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionNotEqualOne(int i) {
+    return ((i > 42) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<GT>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionEqualZero(int i) {
+    return ((i > 42) != false) ? 13 : 54;
+  }
+
+  // Test that conditions on float/double are not flipped.
+
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:                        NotEqual [{{i\d+}},<<Const1>>]
+
+  public static int floatConditionNotEqualOne(float f) {
+    return ((f > 42.0f) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                        Equal [{{i\d+}},<<Const0>>]
+
+  public static int doubleConditionEqualZero(double d) {
+    return ((d > 42.0) != false) ? 13 : 54;
+  }
+
   public static void main(String[] args) {
     int arg = 123456;
 
@@ -1274,7 +1380,6 @@
     assertDoubleEquals(Div2(150.0), 75.0);
     assertFloatEquals(DivMP25(100.0f), -400.0f);
     assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertLongEquals(Shl1(100), 200);
     assertIntEquals(UShr28And15(0xc1234567), 0xc);
     assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
     assertIntEquals(UShr28And7(0xc1234567), 0x4);
@@ -1283,5 +1388,32 @@
     assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
     assertIntEquals(Shr24And127(0xc1234567), 0x41);
     assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
+    assertIntEquals(0, mulPow2Plus1(0));
+    assertIntEquals(9, mulPow2Plus1(1));
+    assertIntEquals(18, mulPow2Plus1(2));
+    assertIntEquals(900, mulPow2Plus1(100));
+    assertIntEquals(111105, mulPow2Plus1(12345));
+    assertLongEquals(0, mulPow2Minus1(0));
+    assertLongEquals(31, mulPow2Minus1(1));
+    assertLongEquals(62, mulPow2Minus1(2));
+    assertLongEquals(3100, mulPow2Minus1(100));
+    assertLongEquals(382695, mulPow2Minus1(12345));
+
+    booleanField = false;
+    assertIntEquals(booleanFieldNotEqualOne(), 54);
+    assertIntEquals(booleanFieldEqualZero(), 54);
+    booleanField = true;
+    assertIntEquals(booleanFieldNotEqualOne(), 13);
+    assertIntEquals(booleanFieldEqualZero(), 13);
+    assertIntEquals(intConditionNotEqualOne(6), 54);
+    assertIntEquals(intConditionNotEqualOne(43), 13);
+    assertIntEquals(intConditionEqualZero(6), 54);
+    assertIntEquals(intConditionEqualZero(43), 13);
+    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
+    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
+    assertIntEquals(doubleConditionEqualZero(6.0), 54);
+    assertIntEquals(doubleConditionEqualZero(43.0), 13);
   }
+
+  public static boolean booleanField;
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..5080f142 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -19,23 +19,25 @@
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeVirtual
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index ab4afdb..1de0bae 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -136,11 +136,11 @@
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
 ## CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:<<HeaderY>>
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
@@ -152,12 +152,12 @@
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:none
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
 ## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
 
@@ -177,7 +177,7 @@
 
   # Additional logic which will end up outside the loop
   if-eqz p2, :skip_if
-  mul-int/lit8 p0, p0, 9
+  mul-int/lit8 p0, p0, 11
   :skip_if
 
   if-nez v0, :loop_end    # will always take the branch
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a8b6307 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
   /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 58c92f1..e827b1e 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -27,8 +27,10 @@
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linear(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -39,8 +41,10 @@
 
   /// CHECK-START: int Main.linearDown(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDown(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -51,8 +55,10 @@
 
   /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearObscure(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -64,8 +70,10 @@
 
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearVeryObscure(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -75,10 +83,29 @@
     return result;
   }
 
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int hiddenStride(int[] a) {
+    int result = 0;
+    for (int i = 1; i <= 1; i++) {
+      // Obscured unit stride.
+      for (int j = 0; j < a.length; j += i) {
+        result += a[j];
+      }
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWhile(int[] x) {
     int i = 0;
     int result = 0;
@@ -90,8 +117,10 @@
 
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearThreeWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -106,8 +135,10 @@
 
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearFourWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -126,8 +157,10 @@
 
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinear(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -141,8 +174,10 @@
 
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -160,8 +195,10 @@
 
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearWithParameter(int n) {
     int[] x = new int[n];
     for (int i = 0; i < n; i++) {
@@ -172,8 +209,10 @@
 
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearCopy(int x[]) {
     int n = x.length;
     int y[] = new int[n];
@@ -183,10 +222,59 @@
     return y;
   }
 
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithCompoundStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
     int result = 0;
@@ -200,8 +288,10 @@
 
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -216,8 +306,10 @@
 
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -232,8 +324,10 @@
 
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -248,8 +342,10 @@
 
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -264,8 +360,10 @@
 
   /// CHECK-START: int Main.linearForNEUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -277,8 +375,10 @@
 
   /// CHECK-START: int Main.linearForNEDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -290,8 +390,10 @@
 
   /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -304,8 +406,10 @@
 
   /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -318,8 +422,10 @@
 
   /// CHECK-START: int Main.linearShort() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -330,10 +436,221 @@
     return result;
   }
 
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnTwoArrayLengths(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < b.length; j++) {
+        // Need to know j < b.length < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < b.length for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnOneArrayLength(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnParameter(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < n for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularVariations(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.linearTriangularVariations(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularVariations(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < i; j++) {
+        a[j] += 1;
+      }
+      for (int j = i - 1; j >= 0; j--) {
+        a[j] += 1;
+      }
+      for (int j = i; j < n; j++) {
+        a[j] += 1;
+      }
+      for (int j = n - 1; j > i - 1; j--) {
+        a[j] += 1;
+      }
+    }
+    verifyTriangular(a);
+  }
+
+  // Verifier for triangular loops.
+  private static void verifyTriangular(int[] a, int[] b, int m, int n) {
+    expectEquals(n, a.length);
+    for (int i = 0, k = m; i < n; i++) {
+      expectEquals(a[i], k);
+      if (k > 0) k--;
+    }
+    expectEquals(m, b.length);
+    for (int i = 0; i < m; i++) {
+      expectEquals(b[i], 1);
+    }
+  }
+
+  // Verifier for triangular loops.
+  private static void verifyTriangular(int[] a) {
+    int n = a.length;
+    for (int i = 0; i < n; i++) {
+      expectEquals(a[i], n + n);
+    }
+  }
+
+  /// CHECK-START: void Main.bubble(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.bubble(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void bubble(int[] a) {
+    for (int i = a.length; --i >= 0;) {
+      for (int j = 0; j < i; j++) {
+        if (a[j] > a[j+1]) {
+          int tmp = a[j];
+          a[j]  = a[j+1];
+          a[j+1] = tmp;
+        }
+      }
+    }
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicIdiom(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -348,8 +665,10 @@
 
   /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence2(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -370,8 +689,10 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence4(int tc) {
     int[] x = { 1, 3, 5, 7 };
     // Loop with periodic sequence (0, 1, 2, 3).
@@ -393,8 +714,10 @@
 
   /// CHECK-START: int Main.justRightUp1() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -406,8 +729,10 @@
 
   /// CHECK-START: int Main.justRightUp2() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -419,8 +744,10 @@
 
   /// CHECK-START: int Main.justRightUp3() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -432,8 +759,10 @@
 
   /// CHECK-START: int Main.justOOBUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -446,8 +775,10 @@
 
   /// CHECK-START: int Main.justRightDown1() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -459,8 +790,10 @@
 
   /// CHECK-START: int Main.justRightDown2() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -472,8 +805,10 @@
 
   /// CHECK-START: int Main.justRightDown3() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -485,8 +820,10 @@
 
   /// CHECK-START: int Main.justOOBDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -499,8 +836,10 @@
 
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
@@ -509,8 +848,10 @@
 
   /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
@@ -519,8 +860,10 @@
 
   /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
@@ -531,8 +874,10 @@
 
   /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
@@ -541,6 +886,364 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int[] Main.multiply1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: int[] Main.multiply1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static int[] multiply1() {
+    int[] a = new int[10];
+    try {
+      for (int i = 0; i <= 3; i++) {
+        for (int j = 0; j <= 3; j++) {
+          // Range [0,9]: safe.
+          a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
+  /// CHECK-START: int[] Main.multiply2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  static int[] multiply2() {
+    int[] a = new int[10];
+    try {
+      for (int i = -3; i <= 3; i++) {
+        for (int j = -3; j <= 3; j++) {
+          // Range [-9,9]: unsafe.
+         a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  //
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  //
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: ArrayGet
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[0][i] = 1;
+        a[1][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  //
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
+                                                  boolean[] r,
+                                                  byte[] s,
+                                                  char[] t,
+                                                  short[] u,
+                                                  int[] v,
+                                                  long[] w,
+                                                  float[] x,
+                                                  double[] y,
+                                                  Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+    }
+    return result;
+  }
+
   //
   // Verifier.
   //
@@ -558,6 +1261,8 @@
     expectEquals(55, linearObscure(x));
     expectEquals(0, linearVeryObscure(empty));
     expectEquals(55, linearVeryObscure(x));
+    expectEquals(0, hiddenStride(empty));
+    expectEquals(55, hiddenStride(x));
     expectEquals(0, linearWhile(empty));
     expectEquals(55, linearWhile(x));
     expectEquals(0, linearThreeWayPhi(empty));
@@ -596,6 +1301,9 @@
     }
 
     // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
     expectEquals(66, linearWithVeryLargePositiveStride());
@@ -608,6 +1316,18 @@
     expectEquals(55, linearDoWhileUp());
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
+    expectEquals(55, invariantFromPreLoop(x, 1));
+    linearTriangularOnTwoArrayLengths(10);
+    linearTriangularOnOneArrayLength(10);
+    linearTriangularOnParameter(10);
+    linearTriangularVariations(10);
+
+    // Sorting.
+    int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
+    bubble(sort);
+    for (int i = 0; i < 10; i++) {
+      expectEquals(sort[i], x[i]);
+    }
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
@@ -690,6 +1410,100 @@
       sResult += 1000;
     }
     expectEquals(1055, sResult);
+
+    // Multiplication.
+    {
+      int[] e1 = { 7, 1, 2, 2, 1, 0, 2, 0, 0, 1 };
+      int[] a1 = multiply1();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a1[i], e1[i]);
+      }
+      int[] e2 = { 1001, 0, 0, 1, 0, 0, 1, 0, 0, 1 };
+      int[] a2 = multiply2();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a2[i], e2[i]);
+      }
+    }
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[2][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[0][1], 1);
+    expectEquals(a[1][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    Integer[] x9 = { 9 };
+    expectEquals(505,
+        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 13c4722..cadf706 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -25,6 +25,9 @@
 }
 
 class TestClass {
+  static {
+    sTestClassObj = new TestClass(-1, -2);
+  }
   TestClass() {
   }
   TestClass(int i, int j) {
@@ -37,6 +40,7 @@
   TestClass next;
   String str;
   static int si;
+  static TestClass sTestClassObj;
 }
 
 class SubTestClass extends TestClass {
@@ -48,6 +52,11 @@
   int j;
 }
 
+class TestClass3 {
+  float floatField = 8.0f;
+  boolean test1 = true;
+}
+
 class Finalizable {
   static boolean sVisited = false;
   static final int VALUE = 0xbeef;
@@ -115,10 +124,11 @@
   }
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
+  /// CHECK: StaticFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldSet
   /// CHECK: InstanceFieldGet
@@ -127,21 +137,31 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
-  /// CHECK: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
-  /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
-  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: StaticFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
+  /// CHECK-NOT: StaticFieldGet
 
-  // A new allocation shouldn't alias with pre-existing values.
+  // A new allocation (even non-singleton) shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
+    // Do an allocation here to avoid the HLoadClass and HClinitCheck
+    // at the second allocation.
+    new TestClass();
+    TestClass obj1 = TestClass.sTestClassObj;
+    TestClass obj2 = new TestClass();  // Cannot alias with obj or obj1 which pre-exist.
+    obj.next = obj2;  // Make obj2 a non-singleton.
+    // All stores below need to stay since obj/obj1/obj2 are not singletons.
     obj.i = 1;
-    obj.next.j = 2;
-    TestClass obj2 = new TestClass();
+    obj1.j = 2;
+    // Following stores won't kill values of obj.i and obj1.j.
     obj2.i = 3;
     obj2.j = 4;
-    return obj.i + obj.next.j + obj2.i + obj2.j;
+    return obj.i + obj1.j + obj2.i + obj2.j;
   }
 
   /// CHECK-START: int Main.test4(TestClass, boolean) load_store_elimination (before)
@@ -469,27 +489,32 @@
     return obj;
   }
 
-  /// CHECK-START: void Main.test21() load_store_elimination (before)
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
-  /// CHECK: StaticFieldSet
-  /// CHECK: StaticFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
 
-  /// CHECK-START: void Main.test21() load_store_elimination (after)
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (after)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
-  /// CHECK: StaticFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldGet
 
   // Loop side effects can kill heap values, stores need to be kept in that case.
-  static void test21() {
+  static void test21(TestClass obj0) {
     TestClass obj = new TestClass();
+    obj0.str = "abc";
     obj.str = "abc";
     for (int i = 0; i < 2; i++) {
-      // Generate some loop side effect that does write.
-      obj.si = 1;
+      // Generate some loop side effect that writes into obj.
+      obj.str = "def";
     }
-    System.out.print(obj.str.substring(0, 0));
+    System.out.print(obj0.str.substring(0, 0) + obj.str.substring(0, 0));
   }
 
   /// CHECK-START: int Main.test22() load_store_elimination (before)
@@ -505,27 +530,29 @@
 
   /// CHECK-START: int Main.test22() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK: NewInstance
   /// CHECK-NOT: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
+  /// CHECK-NOT: InstanceFieldGet
   /// CHECK-NOT: InstanceFieldGet
 
-  // Loop side effects only affects stores into singletons that dominiates the loop header.
+  // For a singleton, loop side effects can kill its field values only if:
+  // (1) it dominiates the loop header, and
+  // (2) its fields are stored into inside a loop.
   static int test22() {
     int sum = 0;
     TestClass obj1 = new TestClass();
-    obj1.i = 2;       // This store can't be eliminated since it can be killed by loop side effects.
+    obj1.i = 2;    // This store can be eliminated since obj1 is never stored into inside a loop.
     for (int i = 0; i < 2; i++) {
       TestClass obj2 = new TestClass();
-      obj2.i = 3;    // This store can be eliminated since the singleton is inside the loop.
+      obj2.i = 3;  // This store can be eliminated since the singleton is inside the loop.
       sum += obj2.i;
     }
     TestClass obj3 = new TestClass();
-    obj3.i = 5;      // This store can be eliminated since the singleton is created after the loop.
+    obj3.i = 5;    // This store can be eliminated since the singleton is created after the loop.
     sum += obj1.i + obj3.i;
     return sum;
   }
@@ -562,6 +589,37 @@
     return obj.i;
   }
 
+  /// CHECK-START: float Main.test24() load_store_elimination (before)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<True>>]
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<Float8>>]
+  /// CHECK-DAG:     <<GetTest:z\d+>>  InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:                       If [<<GetTest>>]
+  /// CHECK-DAG:     <<GetField:f\d+>> InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<GetField>>]
+  /// CHECK-DAG:                       Return [<<Phi>>]
+
+  /// CHECK-START: float Main.test24() load_store_elimination (after)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
+  /// CHECK-DAG:                       If [<<True>>]
+  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<Float8>>]
+  /// CHECK-DAG:                       Return [<<Phi>>]
+
+  static float test24() {
+    float a = 42.0f;
+    TestClass3 obj = new TestClass3();
+    if (obj.test1) {
+      a = obj.floatField;
+    }
+    return a;
+  }
+
   /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -664,10 +722,11 @@
     float[] fa2 = { 1.8f };
     assertFloatEquals(test19(fa1, fa2), 1.8f);
     assertFloatEquals(test20().i, 0);
-    test21();
+    test21(new TestClass());
     assertIntEquals(test22(), 13);
     assertIntEquals(test23(true), 4);
     assertIntEquals(test23(false), 5);
+    assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
   }
 }
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..3f65d5a 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -35,7 +35,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
@@ -56,7 +56,7 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      InvokeVirtual
   /// CHECK:      test
   /// CHECK:      jz/eq
   // Check that we don't try to compare the classes.
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 12f0380..f791adf 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,26 +260,43 @@
     return arg ^ 0xf00000000000000fL;
   }
 
-  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
-  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2
-  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<low>>, #2
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK:                lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK:                add
+  /// CHECK:                adc
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            shl
+
+  public static long shl1(long arg) {
+    return arg << 1;
+  }
 
   /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl2(long arg) {
-    // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2).
     return arg << 2;
   }
 
   /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
-  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31
   /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
-  /// CHECK:                lsl{{s?|.w}} {{r\d+}}, <<low>>, #31
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31
 
   /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl31(long arg) {
     return arg << 31;
@@ -287,114 +304,136 @@
 
   /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
   /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}}
+  /// CHECK-NOT:            lsl{{s?|\.w}}
 
   public static long shl32(long arg) {
     return arg << 32;
   }
 
   /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl33(long arg) {
     return arg << 33;
   }
 
   /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl63(long arg) {
     return arg << 63;
   }
 
   /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #1
+  /// CHECK:                asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
 
   /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr1(long arg) {
     return arg >> 1;
   }
 
-  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
-  /// CHECK:                asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr2(long arg) {
+    return arg >> 2;
+  }
 
   /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr31(long arg) {
     return arg >> 31;
   }
 
   /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
   /// CHECK-DAG:            mov {{r\d+}}, <<high>>
 
   /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NOT:            lsr{{s?|.w}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
 
   public static long shr32(long arg) {
     return arg >> 32;
   }
 
   /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
 
   /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr33(long arg) {
     return arg >> 33;
   }
 
   /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
 
   /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr63(long arg) {
     return arg >> 63;
   }
 
   /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, <<high>>, #1
+  /// CHECK:                lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
 
   /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr1(long arg) {
     return arg >>> 1;
   }
 
-  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
-  /// CHECK:                lsr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr2(long arg) {
+    return arg >>> 2;
+  }
 
   /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr31(long arg) {
     return arg >>> 31;
@@ -402,32 +441,32 @@
 
   /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
   /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
 
   public static long ushr32(long arg) {
     return arg >>> 32;
   }
 
   /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr33(long arg) {
     return arg >>> 33;
   }
 
   /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr63(long arg) {
     return arg >>> 63;
@@ -485,11 +524,13 @@
 
     assertLongEquals(14, addM1(7));
 
+    assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
     assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
     assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
     assertLongEquals(shl32(longArg), 0x8765432100000000L);
     assertLongEquals(shl33(longArg), 0x0eca864200000000L);
     assertLongEquals(shl63(longArg), 0x8000000000000000L);
+    assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL);
     assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
     assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
     assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
@@ -497,22 +538,26 @@
     assertLongEquals(shl63(~longArg), 0x0000000000000000L);
 
     assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(shr2(longArg), 0x048d159e21d950c8L);
     assertLongEquals(shr31(longArg), 0x000000002468acf1L);
     assertLongEquals(shr32(longArg), 0x0000000012345678L);
     assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
     assertLongEquals(shr63(longArg), 0x0000000000000000L);
     assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+    assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L);
     assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
     assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
     assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
     assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
 
     assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L);
     assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
     assertLongEquals(ushr32(longArg), 0x0000000012345678L);
     assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
     assertLongEquals(ushr63(longArg), 0x0000000000000000L);
     assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+    assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L);
     assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
     assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
     assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
diff --git a/test/542-bitfield-rotates/expected.txt b/test/542-bitfield-rotates/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/542-bitfield-rotates/expected.txt
diff --git a/test/542-bitfield-rotates/info.txt b/test/542-bitfield-rotates/info.txt
new file mode 100644
index 0000000..961be3b
--- /dev/null
+++ b/test/542-bitfield-rotates/info.txt
@@ -0,0 +1 @@
+Tests bitfield rotate simplification in optimizing compiler.
diff --git a/test/542-bitfield-rotates/src/Main.java b/test/542-bitfield-rotates/src/Main.java
new file mode 100644
index 0000000..f2bc153
--- /dev/null
+++ b/test/542-bitfield-rotates/src/Main.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void main(String args[]) throws Exception {
+    test_Integer_right_v_csubv();
+    test_Long_right_v_csubv();
+
+    test_Integer_right_constant_v();
+    test_Long_right_constant_v();
+
+    test_Integer_left_csubv_v();
+    test_Long_left_csubv_v();
+
+    test_Integer_right_v_negv();
+    test_Long_right_v_negv();
+
+    test_Integer_left_negv_v();
+    test_Long_left_negv_v();
+
+    test_Integer_left_constant_v();
+    test_Long_left_constant_v();
+  }
+
+  public static boolean doThrow = false;
+
+  public static int $noinline$rotate_int_right_reg_v_csubv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  public static void test_Integer_right_v_csubv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_csubv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_csubv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  public static void test_Long_right_v_csubv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_csubv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_csubv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  public static void test_Integer_left_csubv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_csubv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_csubv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  public static void test_Long_left_csubv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_csubv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_reg_v_negv(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Integer_right_v_negv() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE - 1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, Integer.SIZE + 1), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -1), 0x22);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    assertIntEquals($noinline$rotate_int_right_reg_v_negv(0x80000000, 1), 0x40000000);
+  }
+
+  public static long $noinline$rotate_long_right_reg_v_negv(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> distance) | (value << -distance);
+  }
+
+  public static void test_Long_right_v_negv() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE - 1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -1), 0x22);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x11, -(Long.SIZE + 1)), 0x22);
+
+    assertLongEquals($noinline$rotate_long_right_reg_v_negv(0x8000000000000000L, 1), 0x4000000000000000L);
+  }
+
+  public static int $noinline$rotate_int_left_reg_negv_v(int value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Integer_left_negv_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, 1), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE - 1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, Integer.SIZE + 1), 0x22);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -1), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE - 1)), 0x22);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -Integer.SIZE), 0x11);
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    assertIntEquals($noinline$rotate_int_left_reg_negv_v(0xC0000000, 1), 0x80000001);
+  }
+
+  public static long $noinline$rotate_long_left_reg_negv_v(long value, int distance) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -distance) | (value << distance);
+  }
+
+  public static void test_Long_left_negv_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 0), 0x11);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, 1), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, Long.SIZE + 1), 0x22);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -1), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE - 1)), 0x22);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -Long.SIZE), 0x11);
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    assertLongEquals($noinline$rotate_long_left_reg_negv_v(0xC000000000000000L, 1), 0x8000000000000001L);
+  }
+
+  public static int $noinline$rotate_int_right_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static int $noinline$rotate_int_right_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static int $noinline$rotate_int_right_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static void test_Integer_right_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_right_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_right_constant_1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_right_constant_m1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_right_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_right_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 0) | (value << 0);
+  }
+
+  public static long $noinline$rotate_long_right_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 1) | (value << -1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> -1) | (value << 1);
+  }
+
+  public static long $noinline$rotate_long_right_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 16) | (value << -16);
+  }
+
+  public static long $noinline$rotate_long_right_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 32) | (value << -32);
+  }
+
+  public static long $noinline$rotate_long_right_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 48) | (value << -48);
+  }
+
+  public static long $noinline$rotate_long_right_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value >>> 64) | (value << -64);
+  }
+
+  public static void test_Long_right_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_right_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_right_constant_1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_right_constant_m1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_right_constant_16(0x11), 0x11000000000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_right_constant_48(0x11), 0x110000L);
+  }
+
+  public static int $noinline$rotate_int_left_constant_0(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static int $noinline$rotate_int_left_constant_1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_m1(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static int $noinline$rotate_int_left_constant_16(int value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static void test_Integer_left_constant_v() throws Exception {
+    assertIntEquals($noinline$rotate_int_left_constant_0(0x11), 0x11);
+    assertIntEquals($noinline$rotate_int_left_constant_1(0x11), 0x22);
+    assertIntEquals($noinline$rotate_int_left_constant_m1(0x11), 0x80000008);
+    assertIntEquals($noinline$rotate_int_left_constant_16(0x11), 0x110000);
+  }
+
+  public static long $noinline$rotate_long_left_constant_0(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 0) | (value >>> 0);
+  }
+
+  public static long $noinline$rotate_long_left_constant_1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 1) | (value >>> -1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_m1(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << -1) | (value >>> 1);
+  }
+
+  public static long $noinline$rotate_long_left_constant_16(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 16) | (value >>> -16);
+  }
+
+  public static long $noinline$rotate_long_left_constant_32(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 32) | (value >>> -32);
+  }
+
+  public static long $noinline$rotate_long_left_constant_48(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 48) | (value >>> -48);
+  }
+
+  public static long $noinline$rotate_long_left_constant_64(long value) {
+    if (doThrow) {
+      throw new Error();
+    }
+    return (value << 64) | (value >>> -64);
+  }
+
+  public static void test_Long_left_constant_v() throws Exception {
+    assertLongEquals($noinline$rotate_long_left_constant_0(0x11), 0x11);
+    assertLongEquals($noinline$rotate_long_left_constant_1(0x11), 0x22);
+    assertLongEquals($noinline$rotate_long_left_constant_m1(0x11), 0x8000000000000008L);
+    assertLongEquals($noinline$rotate_long_left_constant_16(0x11), 0x110000L);
+    assertLongEquals($noinline$rotate_long_left_constant_32(0x11), 0x1100000000L);
+    assertLongEquals($noinline$rotate_long_left_constant_48(0x11), 0x11000000000000L);
+  }
+
+}
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 44e907d..1756fa4 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -202,27 +202,35 @@
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
 ## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (before)
-## CHECK-DAG:     <<Arg0:i\d+>>     ParameterValue
-## CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
-## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
-## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
-## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
-## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
-## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
-## CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg0>>,<<Arg1>>]
-## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
-## CHECK-DAG:                       Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
+## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
+## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>>  IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>>  IntConstant 15
+## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
+## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<Arg0>>,<<Arg1>>]
+## CHECK-DAG:     <<Phi:i\d+>>       Phi [<<Add>>,<<Const0xf>>] reg:3 is_catch_phi:false
+## CHECK-DAG:                        Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Phi>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
 ## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
-## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
-## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
-## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
-## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
-## CHECK-DAG:                       Phi [<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
-## CHECK-DAG:                       Phi [<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>>  IntConstant 14
+## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
+## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
+## CHECK-DAG:                        Phi [<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
 .method public static testCatchPhiInputs_DefinedInTryBlock(IIII)I
-    .registers 7
+    .registers 8
 
     invoke-static {}, LTestCase;->$inline$False()Z
     move-result v0
@@ -232,17 +240,24 @@
     shr-int/2addr p2, p3
 
     :try_start
-    const v1, 0xa           # dead catch phi input, defined in entry block
-    add-int v2, p0, p1      # dead catch phi input, defined in the dead block
+    const v1, 0xa           # dead catch phi input, defined in entry block (HInstruction)
+    add-int v2, p0, p1      # dead catch phi input, defined in the dead block (HInstruction)
+    move v3, v2
+    if-eqz v3, :define_phi
+    const v3, 0xf
+    :define_phi
+    # v3 = Phi [Add, 0xf]   # dead catch phi input, defined in the dead block (HPhi)
     div-int/2addr p0, v2
 
     :else
     const v1, 0xb           # live catch phi input
     const v2, 0xc           # live catch phi input
+    const v3, 0x10          # live catch phi input
     div-int/2addr p0, p3
 
     const v1, 0xd           # live catch phi input
     const v2, 0xe           # live catch phi input
+    const v3, 0x11          # live catch phi input
     div-int/2addr p0, p1
     :try_end
     .catchall {:try_start .. :try_end} :catch_all
@@ -252,6 +267,7 @@
 
     :catch_all
     sub-int p0, v1, v2      # use catch phi values
+    sub-int p0, p0, v3      # use catch phi values
     goto :return
 
 .end method
@@ -260,8 +276,6 @@
 # dead try blocks.
 
 ## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (before)
-## CHECK-DAG:     <<Arg0:i\d+>>     ParameterValue
-## CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc
new file mode 100644
index 0000000..4108323
--- /dev/null
+++ b/test/543-env-long-ref/env_long_ref.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/context.h"
+#include "art_method-inl.h"
+#include "jni.h"
+#include "scoped_thread_state_change.h"
+#include "stack.h"
+#include "thread.h"
+
+namespace art {
+
+namespace {
+
+class TestVisitor : public StackVisitor {
+ public:
+  TestVisitor(const ScopedObjectAccess& soa, Context* context, jobject expected_value)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(soa.Self(), context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        expected_value_(expected_value),
+        found_(false),
+        soa_(soa) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name == "testCase") {
+      found_ = true;
+      uint32_t value = 0;
+      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
+      CHECK_EQ(reinterpret_cast<mirror::Object*>(value),
+               soa_.Decode<mirror::Object*>(expected_value_));
+    }
+    return true;
+  }
+
+  jobject expected_value_;
+  bool found_;
+  const ScopedObjectAccess& soa_;
+};
+
+}  // namespace
+
+extern "C" JNIEXPORT void JNICALL Java_Main_lookForMyRegisters(JNIEnv*, jclass, jobject value) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<Context> context(Context::Create());
+  TestVisitor visitor(soa, context.get(), value);
+  visitor.WalkStack();
+  CHECK(visitor.found_);
+}
+
+}  // namespace art
diff --git a/test/543-env-long-ref/expected.txt b/test/543-env-long-ref/expected.txt
new file mode 100644
index 0000000..89f155b
--- /dev/null
+++ b/test/543-env-long-ref/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+42
diff --git a/test/543-env-long-ref/info.txt b/test/543-env-long-ref/info.txt
new file mode 100644
index 0000000..6a42533
--- /dev/null
+++ b/test/543-env-long-ref/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing that used to not return
+the right dex register in debuggable when a new value
+was overwriting the high dex register of a wide value.
diff --git a/test/543-env-long-ref/smali/TestCase.smali b/test/543-env-long-ref/smali/TestCase.smali
new file mode 100644
index 0000000..608d6eb
--- /dev/null
+++ b/test/543-env-long-ref/smali/TestCase.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static testCase()I
+  .registers 5
+  const-wide/16 v0, 0x1
+  invoke-static {v0, v1}, LMain;->$noinline$allocate(J)LMain;
+  move-result-object v1
+  invoke-static {v1}, LMain;->lookForMyRegisters(LMain;)V
+  iget v2, v1, LMain;->field:I
+  return v2
+.end method
diff --git a/test/543-env-long-ref/src/Main.java b/test/543-env-long-ref/src/Main.java
new file mode 100644
index 0000000..e723789
--- /dev/null
+++ b/test/543-env-long-ref/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase");
+    Integer a = (Integer)m.invoke(null, (Object[]) null);
+    System.out.println(a);
+  }
+
+  public static Main $noinline$allocate(long a) {
+    try {
+      return new Main();
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  public static native void lookForMyRegisters(Main m);
+
+  int field = 42;
+}
diff --git a/test/550-checker-multiply-accumulate/expected.txt b/test/550-checker-multiply-accumulate/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/expected.txt
diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
new file mode 100644
index 0000000..2d0688d
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -0,0 +1,234 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test basic merging of `MUL+ADD` into `MULADD`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulAdd(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    return acc + left * right;
+  }
+
+  /**
+   * Test basic merging of `MUL+SUB` into `MULSUB`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulSub(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    return acc - left * right;
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction when there
+   * are other uses of the multiplication that cannot merge it.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+  public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    int temp = left * right;
+    return temp | (acc + temp);
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction even when all
+   * uses of the multiplication can merge it.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+
+  public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    long temp = left * right;
+    return (acc + temp) + (acc - temp);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (b + 1)` as `a + (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulPlusOne(int acc, int var) {
+    if (doThrow) throw new Error();
+    return acc * (var + 1);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (1 - b)` as `a - (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulMinusOne(long acc, long var) {
+    if (doThrow) throw new Error();
+    return acc * (1 - var);
+  }
+
+
+  public static void main(String[] args) {
+    assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
+    assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6));
+    assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9));
+    assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
+    assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
+    assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+  }
+}
diff --git a/test/550-checker-regression-wide-store/expected.txt b/test/550-checker-regression-wide-store/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-regression-wide-store/expected.txt
diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt
new file mode 100644
index 0000000..6cf04bc
--- /dev/null
+++ b/test/550-checker-regression-wide-store/info.txt
@@ -0,0 +1,3 @@
+Test an SsaBuilder regression where storing into the high vreg of a pair
+would not invalidate the low vreg. The resulting environment would generate
+an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file
diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali
new file mode 100644
index 0000000..7974d56
--- /dev/null
+++ b/test/550-checker-regression-wide-store/smali/TestCase.smali
@@ -0,0 +1,82 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static $noinline$throw()V
+  .registers 1
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  throw v0
+.end method
+
+# Test storing into the high vreg of a wide pair. This scenario has runtime
+# behaviour implications so we run it from Main.main.
+
+## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after)
+## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>]
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]]
+## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw  env:[[_,<<Cast>>,<<Arg>>,_]]
+
+.method public static invalidateLow(J)I
+  .registers 4
+
+  const/4 v1, 0x0
+
+  :try_start
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-wide v0, p0
+  long-to-int v1, v0
+  invoke-static {}, LTestCase;->$noinline$throw()V
+  :try_end
+  .catchall {:try_start .. :try_end} :catchall
+
+  :catchall
+  return v1
+
+.end method
+
+# Test that storing a wide invalidates the value in the high vreg. This
+# cannot be detected from runtime so we only test the environment with Checker.
+
+## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]]
+
+.method public static invalidateHigh1(J)V
+  .registers 4
+
+  const/4 v1, 0x0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
+
+## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]]
+
+.method public static invalidateHigh2(J)V
+  .registers 5
+
+  move-wide v1, p0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java
new file mode 100644
index 0000000..9b502df
--- /dev/null
+++ b/test/550-checker-regression-wide-store/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static int runTestCase(String name, long arg) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod(name, long.class);
+    int result = (Integer) m.invoke(null, arg);
+    return result;
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    assertEquals(42, runTestCase("invalidateLow", 42L));
+  }
+}
diff --git a/test/550-new-instance-clinit/expected.txt b/test/550-new-instance-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-new-instance-clinit/expected.txt
diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt
new file mode 100644
index 0000000..c5fa3c7
--- /dev/null
+++ b/test/550-new-instance-clinit/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing which used to treat
+HNewInstance as not having side effects even though it
+could invoke a clinit method.
diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java
new file mode 100644
index 0000000..45e259e
--- /dev/null
+++ b/test/550-new-instance-clinit/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    int foo = Main.a;
+    new Bar();
+    foo = Main.a;
+    if (foo != 43) {
+      throw new Error("Expected 43, got " + foo);
+    }
+  }
+  static int a = 42;
+}
+
+class Bar {
+  static {
+    Main.a++;
+  }
+}
diff --git a/test/551-checker-clinit/expected.txt b/test/551-checker-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-clinit/expected.txt
diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt
new file mode 100644
index 0000000..4d54bb5
--- /dev/null
+++ b/test/551-checker-clinit/info.txt
@@ -0,0 +1 @@
+Checker test to ensure we optimize aways HClinitChecks as expected.
diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java
new file mode 100644
index 0000000..5ec30480
--- /dev/null
+++ b/test/551-checker-clinit/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {}
+  public static int foo = 42;
+
+  /// CHECK-START: void Main.inlinedMethod() builder (after)
+  /// CHECK:                        ClinitCheck
+
+  /// CHECK-START: void Main.inlinedMethod() inliner (after)
+  /// CHECK:                        ClinitCheck
+  /// CHECK-NOT:                    ClinitCheck
+  /// CHECK-NOT:                    InvokeStaticOrDirect
+  public void inlinedMethod() {
+    SubSub.bar();
+  }
+}
+
+class Sub extends Main {
+  /// CHECK-START: void Sub.invokeSuperClass() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeSuperClass() {
+    int a = Main.foo;
+  }
+
+  /// CHECK-START: void Sub.invokeItself() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeItself() {
+    int a = foo;
+  }
+
+  /// CHECK-START: void Sub.invokeSubClass() builder (after)
+  /// CHECK:                            ClinitCheck
+  public void invokeSubClass() {
+    int a = SubSub.foo;
+  }
+
+  public static int foo = 42;
+}
+
+class SubSub {
+  public static void bar() {
+    int a = Main.foo;
+  }
+  public static int foo = 42;
+}
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
new file mode 100644
index 0000000..18e8c59
--- /dev/null
+++ b/test/551-checker-shifter-operand/build
@@ -0,0 +1,212 @@
+#!/bin/bash
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing
+# of `dx` option has been overriden.
+
+# Stop if something fails.
+set -e
+
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
+DX_FLAGS=""
+SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8"
+
+while true; do
+  if [ "x$1" = "x--dx-option" ]; then
+    shift
+    option="$1"
+    # Make sure we run this test *with* `dx` optimizations.
+    if [ "x$option" != "x--no-optimize" ]; then
+      DX_FLAGS="${DX_FLAGS} $option"
+    fi
+    shift
+  elif [ "x$1" = "x--jvm" ]; then
+    shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
+  elif expr "x$1" : "x--" >/dev/null 2>&1; then
+    echo "unknown $0 option: $1" 1>&2
+    exit 1
+  else
+    break
+  fi
+done
+
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
+if [ -e classes.dex ]; then
+  zip $TEST_NAME.jar classes.dex
+  exit 0
+fi
+
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+  # Jack does not support this configuration unless we specify how to partition the DEX file
+  # with a .jpp file.
+  USE_JACK="false"
+fi
+
+if [ ${USE_JACK} = "true" ]; then
+  # Jack toolchain
+  if [ "${HAS_SRC}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src.jack src
+    imported_jack_files="--import src.jack"
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
+    imported_jack_files="--import src2.jack ${imported_jack_files}"
+  fi
+
+  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
+  # class definitions from src2 first.
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  fi
+else
+  # Legacy toolchain with javac+dx
+  if [ "${HAS_SRC}" = "true" ]; then
+    mkdir classes
+    ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+    mkdir classes2
+    ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
+        --dump-width=1000 ${DX_FLAGS} classes2
+    fi
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    mkdir -p classes
+    ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+        --dump-width=1000 ${DX_FLAGS} classes
+    fi
+  fi
+fi
+
+if [ "${HAS_SMALI}" = "true" ]; then
+  # Compile Smali classes
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
+fi
+
+if [ ${HAS_SRC_EX} = "true" ]; then
+  if [ ${USE_JACK} = "true" ]; then
+      # Rename previous "classes.dex" so it is not overwritten.
+      mv classes.dex classes-1.dex
+      #TODO find another way to append src.jack to the jack classpath
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
+      zip $TEST_NAME-ex.jar classes.dex
+      # Restore previous "classes.dex" so it can be zipped.
+      mv classes-1.dex classes.dex
+  else
+    mkdir classes-ex
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
+        --dump-width=1000 ${DX_FLAGS} classes-ex
+
+      # quick shuffle so that the stored name is "classes.dex"
+      mv classes.dex classes-1.dex
+      mv classes-ex.dex classes.dex
+      zip $TEST_NAME-ex.jar classes.dex
+      mv classes.dex classes-ex.dex
+      mv classes-1.dex classes.dex
+    fi
+  fi
+fi
+
+# Create a single jar with two dex files for multidex.
+if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex classes2.dex
+elif [ ${NEED_DEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex
+fi
diff --git a/test/551-checker-shifter-operand/expected.txt b/test/551-checker-shifter-operand/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-shifter-operand/expected.txt
diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/551-checker-shifter-operand/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
new file mode 100644
index 0000000..decdd1f
--- /dev/null
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -0,0 +1,678 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  // Non-inlinable type-casting helpers.
+  static  char $noinline$byteToChar   (byte v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$byteToShort  (byte v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$byteToInt    (byte v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$byteToLong   (byte v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$charToByte   (char v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static short $noinline$charToShort  (char v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$charToInt    (char v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$charToLong   (char v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return  (char)v; }
+  static   int $noinline$shortToInt  (short v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$intToByte     (int v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$intToChar     (int v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$intToShort    (int v) { if (doThrow) throw new Error(); return (short)v; }
+  static  long $noinline$intToLong     (int v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$longToByte   (long v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$longToChar   (long v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$longToShort  (long v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$longToInt    (long v) { if (doThrow) throw new Error(); return   (int)v; }
+
+  /**
+   * Basic test merging a bitfield move operation (here a type conversion) into
+   * the shifter operand.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:       <<tmp:j\d+>>         TypeConversion [<<b>>]
+  /// CHECK:                            Sub [<<l>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after)
+  /// CHECK:                            sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb
+
+  public static long $opt$noinline$translate(long l, byte b) {
+    if (doThrow) throw new Error();
+    long tmp = (long)b;
+    return l - tmp;
+  }
+
+
+  /**
+   * Test that we do not merge into the shifter operand when the left and right
+   * inputs are the the IR.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<a:i\d+>>           ParameterValue
+  /// CHECK:       <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<tmp>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<a:i\d+>>           ParameterValue
+  /// CHECK-DAG:   <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<Shl:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<Shl>>,<<Shl>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static int $opt$noinline$sameInput(int a) {
+    if (doThrow) throw new Error();
+    int tmp = a << 2;
+    return tmp + tmp;
+  }
+
+  /**
+   * Check that we perform the merge for multiple uses.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:       <<Const23:i\d+>>     IntConstant 23
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<arg>>,<<Const23>>]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Add
+
+  public static int $opt$noinline$multipleUses(int arg) {
+    if (doThrow) throw new Error();
+    int tmp = arg << 23;
+    switch (arg) {
+      case 1:  return (arg | 1) + tmp;
+      case 2:  return (arg | 2) + tmp;
+      case 3:  return (arg | 3) + tmp;
+      case 4:  return (arg | 4) + tmp;
+      case (1 << 20):  return (arg | 5) + tmp;
+      default: return 0;
+    }
+  }
+
+  /**
+   * Logical instructions cannot take 'extend' operations into the shift
+   * operand, so test that only the shifts are merged.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
+  /// CHECK:                            and lsl
+  /// CHECK:                            sxtb
+  /// CHECK:                            and
+
+  static void $opt$noinline$testAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)),
+                     (a & (b << 5)) | (a & (byte)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after)
+  /// CHECK:                            orr asr
+  /// CHECK:                            uxth
+  /// CHECK:                            orr
+
+  static void $opt$noinline$testOr(int a, int b) {
+    if (doThrow) throw new Error();
+    assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)),
+                    (a | (b >> 6)) | (a | (char)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after)
+  /// CHECK:                            eor lsr
+  /// CHECK:                            sxtw
+  /// CHECK:                            eor
+
+  static void $opt$noinline$testXor(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)),
+                     (a ^ (b >>> 7)) | (a ^ (int)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after)
+  /// CHECK:                            neg lsl
+  /// CHECK:                            sxth
+  /// CHECK:                            neg
+
+  static void $opt$noinline$testNeg(int a) {
+    if (doThrow) throw new Error();
+    assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a),
+                    (-(a << 8)) | (-(short)a));
+  }
+
+  /**
+   * The functions below are used to compare the result of optimized operations
+   * to non-optimized operations.
+   * On the left-hand side we use a non-inlined function call to ensure the
+   * optimization does not occur. The checker tests ensure that the optimization
+   * does occur on the right-hand.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteInt1(int a, byte b) {
+    assertIntEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendByteInt2(int a, byte b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$byteToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$byteToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteLong(long a, byte b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$byteToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$byteToInt  (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendByte(long a, byte b) {
+    $opt$validateExtendByteInt1((int)a, b);
+    $opt$validateExtendByteInt2((int)a, b);
+    $opt$validateExtendByteLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharInt1(int a, char b) {
+    assertIntEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertIntEquals(a + $noinline$charToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendCharInt2(int a, char b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$charToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$charToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharLong(long a, char b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$charToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$charToInt  (b), a +   (int)b);
+  }
+
+  public static void $opt$validateExtendChar(long a, char b) {
+    $opt$validateExtendCharInt1((int)a, b);
+    $opt$validateExtendCharInt2((int)a, b);
+    $opt$validateExtendCharLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortInt1(int a, short b) {
+    assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b);
+    assertIntEquals(a + $noinline$shortToChar (b), a + (char)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendShortInt2(int a, short b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$shortToInt  (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$shortToLong (b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortLong(long a, short b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b);
+    assertLongEquals(a + $noinline$shortToChar(b), a + (char)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$shortToInt (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendShort(long a, short b) {
+    $opt$validateExtendShortInt1((int)a, b);
+    $opt$validateExtendShortInt2((int)a, b);
+    $opt$validateExtendShortLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendInt(long a, int b) {
+    // All tests have a conversion to `long`. The first three tests also have a
+    // conversion from `int` to the specified type. For each test the conversion
+    // to `long` is merged into the shifter operand.
+    assertLongEquals(a + $noinline$intToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$intToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$intToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$intToLong (b), a +  (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendLong(long a, long b) {
+    // Each test has two conversions, from `long` and then back to `long`. The
+    // conversions to `long` are merged.
+    assertLongEquals(a + $noinline$longToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$longToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$longToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$longToInt  (b), a +   (int)b);
+  }
+
+
+  static int $noinline$IntShl(int b, int c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static int $noinline$IntShr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static int $noinline$IntUshr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftInt(int a, int b) {
+    assertIntEquals(a + $noinline$IntShl(b, 1),   a + (b <<  1));
+    assertIntEquals(a + $noinline$IntShl(b, 6),   a + (b <<  6));
+    assertIntEquals(a + $noinline$IntShl(b, 7),   a + (b <<  7));
+    assertIntEquals(a + $noinline$IntShl(b, 8),   a + (b <<  8));
+    assertIntEquals(a + $noinline$IntShl(b, 14),  a + (b << 14));
+    assertIntEquals(a + $noinline$IntShl(b, 15),  a + (b << 15));
+    assertIntEquals(a + $noinline$IntShl(b, 16),  a + (b << 16));
+    assertIntEquals(a + $noinline$IntShl(b, 30),  a + (b << 30));
+    assertIntEquals(a + $noinline$IntShl(b, 31),  a + (b << 31));
+    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << 32));
+    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << 62));
+    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << 63));
+
+    assertIntEquals(a - $noinline$IntShr(b, 1),   a - (b >>  1));
+    assertIntEquals(a - $noinline$IntShr(b, 6),   a - (b >>  6));
+    assertIntEquals(a - $noinline$IntShr(b, 7),   a - (b >>  7));
+    assertIntEquals(a - $noinline$IntShr(b, 8),   a - (b >>  8));
+    assertIntEquals(a - $noinline$IntShr(b, 14),  a - (b >> 14));
+    assertIntEquals(a - $noinline$IntShr(b, 15),  a - (b >> 15));
+    assertIntEquals(a - $noinline$IntShr(b, 16),  a - (b >> 16));
+    assertIntEquals(a - $noinline$IntShr(b, 30),  a - (b >> 30));
+    assertIntEquals(a - $noinline$IntShr(b, 31),  a - (b >> 31));
+    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> 32));
+    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> 62));
+    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> 63));
+
+    assertIntEquals(a ^ $noinline$IntUshr(b, 1),   a ^ (b >>>  1));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 6),   a ^ (b >>>  6));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 7),   a ^ (b >>>  7));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 8),   a ^ (b >>>  8));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 14),  a ^ (b >>> 14));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 15),  a ^ (b >>> 15));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 16),  a ^ (b >>> 16));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 30),  a ^ (b >>> 30));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 31),  a ^ (b >>> 31));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> 32));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> 62));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  static long $noinline$LongShl(long b, long c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static long $noinline$LongShr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static long $noinline$LongUshr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftLong(long a, long b) {
+    assertLongEquals(a + $noinline$LongShl(b, 1),   a + (b <<  1));
+    assertLongEquals(a + $noinline$LongShl(b, 6),   a + (b <<  6));
+    assertLongEquals(a + $noinline$LongShl(b, 7),   a + (b <<  7));
+    assertLongEquals(a + $noinline$LongShl(b, 8),   a + (b <<  8));
+    assertLongEquals(a + $noinline$LongShl(b, 14),  a + (b << 14));
+    assertLongEquals(a + $noinline$LongShl(b, 15),  a + (b << 15));
+    assertLongEquals(a + $noinline$LongShl(b, 16),  a + (b << 16));
+    assertLongEquals(a + $noinline$LongShl(b, 30),  a + (b << 30));
+    assertLongEquals(a + $noinline$LongShl(b, 31),  a + (b << 31));
+    assertLongEquals(a + $noinline$LongShl(b, 32),  a + (b << 32));
+    assertLongEquals(a + $noinline$LongShl(b, 62),  a + (b << 62));
+    assertLongEquals(a + $noinline$LongShl(b, 63),  a + (b << 63));
+
+    assertLongEquals(a - $noinline$LongShr(b, 1),   a - (b >>  1));
+    assertLongEquals(a - $noinline$LongShr(b, 6),   a - (b >>  6));
+    assertLongEquals(a - $noinline$LongShr(b, 7),   a - (b >>  7));
+    assertLongEquals(a - $noinline$LongShr(b, 8),   a - (b >>  8));
+    assertLongEquals(a - $noinline$LongShr(b, 14),  a - (b >> 14));
+    assertLongEquals(a - $noinline$LongShr(b, 15),  a - (b >> 15));
+    assertLongEquals(a - $noinline$LongShr(b, 16),  a - (b >> 16));
+    assertLongEquals(a - $noinline$LongShr(b, 30),  a - (b >> 30));
+    assertLongEquals(a - $noinline$LongShr(b, 31),  a - (b >> 31));
+    assertLongEquals(a - $noinline$LongShr(b, 32),  a - (b >> 32));
+    assertLongEquals(a - $noinline$LongShr(b, 62),  a - (b >> 62));
+    assertLongEquals(a - $noinline$LongShr(b, 63),  a - (b >> 63));
+
+    assertLongEquals(a ^ $noinline$LongUshr(b, 1),   a ^ (b >>>  1));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 6),   a ^ (b >>>  6));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 7),   a ^ (b >>>  7));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 8),   a ^ (b >>>  8));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 14),  a ^ (b >>> 14));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 15),  a ^ (b >>> 15));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 16),  a ^ (b >>> 16));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 30),  a ^ (b >>> 30));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 31),  a ^ (b >>> 31));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 32),  a ^ (b >>> 32));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 62),  a ^ (b >>> 62));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  public static void main(String[] args) {
+    assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3));
+    assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3));
+
+    assertIntEquals(4096, $opt$noinline$sameInput(512));
+    assertIntEquals(-8192, $opt$noinline$sameInput(-1024));
+
+    assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1));
+    assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20));
+
+    long inputs[] = {
+      -((1L <<  7) - 1L), -((1L <<  7)), -((1L <<  7) + 1L),
+      -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L),
+      -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L),
+      -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L),
+      -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L),
+      -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L),
+      -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L,
+      -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L,
+      0L,
+      1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L,
+      42L,  314L,  2718281828L,  0x123456789L,  0x987654321L,
+      (1L <<  7) - 1L, (1L <<  7), (1L <<  7) + 1L,
+      (1L <<  8) - 1L, (1L <<  8), (1L <<  8) + 1L,
+      (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L,
+      (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L,
+      (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L,
+      (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L,
+      (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L,
+      Long.MIN_VALUE, Long.MAX_VALUE
+    };
+    for (int i = 0; i < inputs.length; i++) {
+      $opt$noinline$testNeg((int)inputs[i]);
+      for (int j = 0; j < inputs.length; j++) {
+        $opt$noinline$testAnd(inputs[i], inputs[j]);
+        $opt$noinline$testOr((int)inputs[i], (int)inputs[j]);
+        $opt$noinline$testXor(inputs[i], inputs[j]);
+
+        $opt$validateExtendByte(inputs[i], (byte)inputs[j]);
+        $opt$validateExtendChar(inputs[i], (char)inputs[j]);
+        $opt$validateExtendShort(inputs[i], (short)inputs[j]);
+        $opt$validateExtendInt(inputs[i], (int)inputs[j]);
+        $opt$validateExtendLong(inputs[i], inputs[j]);
+
+        $opt$validateShiftInt((int)inputs[i], (int)inputs[j]);
+        $opt$validateShiftLong(inputs[i], inputs[j]);
+      }
+    }
+
+  }
+}
diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-implicit-null-checks/expected.txt
diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt
new file mode 100644
index 0000000..bdd066b
--- /dev/null
+++ b/test/551-implicit-null-checks/info.txt
@@ -0,0 +1 @@
+Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file
diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java
new file mode 100644
index 0000000..677e8d3
--- /dev/null
+++ b/test/551-implicit-null-checks/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private class Inner {
+    private long i1;
+  }
+  private Inner inst;
+
+  public static void main(String args[]) throws Exception {
+    Main m = new Main();
+    try {
+      m.$opt$noinline$testGetLong();
+    } catch (NullPointerException ex) {
+      // good
+    }
+    try {
+      m.$opt$noinline$testPutLong(778899112233L);
+    } catch (NullPointerException ex) {
+      // good
+    }
+  }
+
+  public void $opt$noinline$testGetLong() throws Exception {
+    long result = inst.i1;
+    throw new Exception();  // prevent inline
+  }
+
+  public void $opt$noinline$testPutLong(long a) throws Exception {
+    inst.i1 = a;
+    throw new Exception();  // prevent inline
+  }
+}
diff --git a/test/551-invoke-super/expected.txt b/test/551-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/expected.txt b/test/552-checker-sharpening/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..d50edd8
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+  }
+}
diff --git a/test/552-invoke-non-existent-super/expected.txt b/test/552-invoke-non-existent-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/553-invoke-super/expected.txt b/test/553-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/555-UnsafeGetLong-regression/expected.txt b/test/555-UnsafeGetLong-regression/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/555-UnsafeGetLong-regression/info.txt b/test/555-UnsafeGetLong-regression/info.txt
new file mode 100644
index 0000000..0e16ed7
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for sun.misc.Unsafe.getLong's intrinsic's locations
+not handled properly.
diff --git a/test/555-UnsafeGetLong-regression/src/Main.java b/test/555-UnsafeGetLong-regression/src/Main.java
new file mode 100644
index 0000000..1adafae
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/src/Main.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class Main {
+  private static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static Unsafe getUnsafe() throws Exception {
+    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Field f = unsafeClass.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    return (Unsafe) f.get(null);
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Unsafe unsafe = getUnsafe();
+
+    testUnsafeGetLong(unsafe);
+  }
+
+  public static void testUnsafeGetLong(Unsafe unsafe) throws Exception {
+    TestClass test = new TestClass();
+    Field longField = TestClass.class.getDeclaredField("longVar");
+    long lvar = unsafe.objectFieldOffset(longField);
+    lvar = unsafe.getLong(test, lvar);
+    assertLongEquals(1122334455667788L, lvar);
+  }
+
+  private static class TestClass {
+    public long longVar = 1122334455667788L;
+  }
+}
diff --git a/test/556-invoke-super/expected.txt b/test/556-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/556-invoke-super/expected.txt
diff --git a/test/556-invoke-super/info.txt b/test/556-invoke-super/info.txt
new file mode 100644
index 0000000..7de2a4f
--- /dev/null
+++ b/test/556-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode with multidex.
diff --git a/test/556-invoke-super/smali/invokesuper.smali b/test/556-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ef55000
--- /dev/null
+++ b/test/556-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {p0}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/556-invoke-super/src-multidex/SuperClass.java b/test/556-invoke-super/src-multidex/SuperClass.java
new file mode 100644
index 0000000..faf16c4
--- /dev/null
+++ b/test/556-invoke-super/src-multidex/SuperClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  public int returnInt() {
+    return 42;
+  }
+}
diff --git a/test/556-invoke-super/src/Main.java b/test/556-invoke-super/src/Main.java
new file mode 100644
index 0000000..07289f7
--- /dev/null
+++ b/test/556-invoke-super/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/557-checker-instruction-simplifier-ror/expected.txt b/test/557-checker-instruction-simplifier-ror/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/expected.txt
diff --git a/test/557-checker-instruction-simplifier-ror/info.txt b/test/557-checker-instruction-simplifier-ror/info.txt
new file mode 100644
index 0000000..f9a86f8
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/info.txt
@@ -0,0 +1 @@
+Tests simplification of bitfield rotate patterns in optimizing compiler.
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
new file mode 100644
index 0000000..027f262
--- /dev/null
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -0,0 +1,659 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateRight
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerRight(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerRight(int value, int distance) {
+    return java.lang.Integer.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:i\d+>>       InvokeStaticOrDirect intrinsic:IntegerRotateLeft
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rotateIntegerLeft(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static int rotateIntegerLeft(int value, int distance) {
+    return java.lang.Integer.rotateLeft(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateRight
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongRight(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongRight(long value, int distance) {
+    return java.lang.Long.rotateRight(value, distance);
+  }
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Invoke:j\d+>>       InvokeStaticOrDirect intrinsic:LongRotateLeft
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rotateLongLeft(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      LoadClass
+  /// CHECK-NOT:      ClinitCheck
+  /// CHECK-NOT:      InvokeStaticOrDirect
+  public static long rotateLongLeft(long value, int distance) {
+    return java.lang.Long.rotateLeft(value, distance);
+  }
+
+  //  (i >>> #distance) | (i << #(reg_bits - distance))
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const30:i\d+>>      IntConstant 30
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Const30>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c(int value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_c_0(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_c_0(int value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  //  (j >>> #distance) | (j << #(reg_bits - distance))
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Const62:i\d+>>      IntConstant 62
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Const62>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_c(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_c(long value) {
+    return (value >>> 2) | (value << 62);
+  }
+
+  /// CHECK-START: long Main.ror_long_constant_c_c_0(long) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_constant_c_c_0(long value) {
+    return (value >>> 2) | (value << 30);
+  }
+
+  //  (i >>> #distance) | (i << #-distance)
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_constant_c_negc(int value) {
+    return (value >>> 2) | (value << -2);
+  }
+
+  //  (j >>> #distance) | (j << #-distance)
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Const2>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ConstNeg2>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<Const2:i\d+>>       IntConstant 2
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Const2>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long ror_long_constant_c_negc(long value) {
+    return (value >>> 2) | (value << -2);
+  }
+
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_csubv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static int ror_int_reg_v_csubv(int value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (distance = x - y)
+  //  (i >>> distance) | (i << (#reg_bits - distance)
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK-NOT:      Sub
+
+  /// CHECK-START: int Main.ror_int_subv_csubv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Ror>>,<<Sub32>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_subv_csubv_env(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_subv_csubv_env(int value, int x, int y) {
+    int distance = x - y;
+    int bits_minus_dist = 32 - distance;
+    return ((value >>> distance) | (value << bits_minus_dist)) + bits_minus_dist;
+  }
+
+  //  (j >>> distance) | (j << (#reg_bits - distance)
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Sub>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Sub
+  public static long ror_long_reg_v_csubv(long value, int distance) {
+    return (value >>> distance) | (value << (64 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_reg_v_csubv_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_reg_v_csubv_0(long value, int distance) {
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  /// CHECK-START: long Main.ror_long_subv_csubv_0(long, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long ror_long_subv_csubv_0(long value, int x, int y) {
+    int distance = x - y;
+    return (value >>> distance) | (value << (32 - distance));
+  }
+
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_csubv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_csubv_v(int value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (distance = x - y)
+  //  (i >>> (#reg_bits - distance)) | (i << distance)
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK-DAG:      <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK-DAG:      <<Sub32:i\d+>>        Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<SubDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Sub32>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgX:i\d+>>         ParameterValue
+  /// CHECK:          <<ArgY:i\d+>>         ParameterValue
+  /// CHECK:          <<Const32:i\d+>>      IntConstant 32
+  /// CHECK:          <<SubDistance:i\d+>>  Sub [<<ArgX>>,<<ArgY>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const32>>,<<SubDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK:          Sub
+  /// CHECK:          Sub
+
+  /// CHECK-START: int Main.rol_int_csubv_subv(int, int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_csubv_subv(int value, int x, int y) {
+    int distance = x - y;
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (j >>> (#reg_bits - distance)) | (j << distance)
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK-DAG:      <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Sub>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Const64:i\d+>>      IntConstant 64
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Const64>>,<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Sub>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_csubv_v(long value, int distance) {
+    return (value >>> (64 - distance)) | (value << distance);
+  }
+
+  /// CHECK-START: long Main.rol_long_reg_csubv_v_0(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      Ror
+  public static long rol_long_reg_csubv_v_0(long value, int distance) {
+    return (value >>> (32 - distance)) | (value << distance);
+  }
+
+  //  (i >>> distance) | (i << -distance) (i.e. libcore's Integer.rotateRight)
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static int ror_int_reg_v_negv(int value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:          <<Add:i\d+>>          Add [<<Or>>,<<Neg>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Sub:i\d+>>          Sub [<<Ror>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Sub>>]
+
+  /// CHECK-START: int Main.ror_int_reg_v_negv_env(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int ror_int_reg_v_negv_env(int value, int distance) {
+    int neg_distance = -distance;
+    return ((value >>> distance) | (value << neg_distance)) + neg_distance;
+  }
+
+  //  (j >>> distance) | (j << -distance) (i.e. libcore's Long.rotateRight)
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<Neg>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<UShr>>,<<Shl>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.ror_long_reg_v_negv(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  /// CHECK-NOT:      Neg
+  public static long ror_long_reg_v_negv(long value, int distance) {
+    return (value >>> distance) | (value << -distance);
+  }
+
+  //  (i << distance) | (i >>> -distance) (i.e. libcore's Integer.rotateLeft)
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:i\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: int Main.rol_int_reg_negv_v(int, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static int rol_int_reg_negv_v(int value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) | (j >>> -distance) (i.e. libcore's Long.rotateLeft)
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Or:j\d+>>           Or [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Or>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_negv_v(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:      UShr
+  /// CHECK-NOT:      Shl
+  public static long rol_long_reg_negv_v(long value, int distance) {
+    return (value << distance) | (value >>> -distance);
+  }
+
+  //  (j << distance) + (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Add:j\d+>>          Add [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Add>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_add(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Add
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_add(long value, int distance) {
+    return (value << distance) + (value >>> -distance);
+  }
+
+  //  (j << distance) ^ (j >>> -distance)
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (before)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK-DAG:      <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK-DAG:      <<UShr:j\d+>>         UShr [<<ArgValue>>,<<Neg>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>          Shl [<<ArgValue>>,<<ArgDistance>>]
+  /// CHECK:          <<Xor:j\d+>>          Xor [<<Shl>>,<<UShr>>]
+  /// CHECK:                                Return [<<Xor>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK:          <<ArgValue:j\d+>>     ParameterValue
+  /// CHECK:          <<ArgDistance:i\d+>>  ParameterValue
+  /// CHECK:          <<Neg:i\d+>>          Neg [<<ArgDistance>>]
+  /// CHECK:          <<Ror:j\d+>>          Ror [<<ArgValue>>,<<Neg>>]
+  /// CHECK:                                Return [<<Ror>>]
+
+  /// CHECK-START: long Main.rol_long_reg_v_negv_xor(long, int) instruction_simplifier (after)
+  /// CHECK-NOT:  Xor
+  /// CHECK-NOT:  Shl
+  /// CHECK-NOT:  UShr
+  public static long rol_long_reg_v_negv_xor(long value, int distance) {
+    return (value << distance) ^ (value >>> -distance);
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(2, ror_int_constant_c_c(8));
+    assertIntEquals(2, ror_int_constant_c_c_0(8));
+    assertLongEquals(2L, ror_long_constant_c_c(8L));
+
+    assertIntEquals(2, ror_int_constant_c_negc(8));
+    assertLongEquals(2L, ror_long_constant_c_negc(8L));
+
+    assertIntEquals(2, ror_int_reg_v_csubv(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_csubv(8L, 2));
+
+    assertIntEquals(2, ror_int_subv_csubv(8, 2, 0));
+    assertIntEquals(32, ror_int_subv_csubv_env(8, 2, 0));
+    assertIntEquals(32, rol_int_csubv_subv(8, 2, 0));
+
+    assertIntEquals(32, rol_int_reg_csubv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_csubv_v(8L, 2));
+
+    assertIntEquals(2, ror_int_reg_v_negv(8, 2));
+    assertIntEquals(0, ror_int_reg_v_negv_env(8, 2));
+    assertLongEquals(2L, ror_long_reg_v_negv(8L, 2));
+
+    assertIntEquals(32, rol_int_reg_negv_v(8, 2));
+    assertLongEquals(32L, rol_long_reg_negv_v(8L, 2));
+
+    assertLongEquals(32L, rol_long_reg_v_negv_add(8L, 2));
+    assertLongEquals(32L, rol_long_reg_v_negv_xor(8L, 2));
+  }
+}
diff --git a/test/558-switch/expected.txt b/test/558-switch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/558-switch/expected.txt
diff --git a/test/558-switch/info.txt b/test/558-switch/info.txt
new file mode 100644
index 0000000..07283ff
--- /dev/null
+++ b/test/558-switch/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to generate invalid
+code for arm.
diff --git a/test/558-switch/src/Main.java b/test/558-switch/src/Main.java
new file mode 100644
index 0000000..f44231e
--- /dev/null
+++ b/test/558-switch/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+   public static boolean testMethod(int statusCode) {
+        switch (statusCode) {
+        case 303:
+        case 301:
+        case 302:
+        case 307:
+            return true;
+        default:
+            return false;
+        } //end of switch
+    }
+
+  public static void main(String[] args) {
+    if (!testMethod(301)) {
+      throw new Error("Unexpected result");
+    }
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index a590cf1..27f5b5d 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -47,4 +47,6 @@
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
 b/25494456
+b/21869691
+b/26143249
 Done!
diff --git a/test/800-smali/smali/b_21869691A.smali b/test/800-smali/smali/b_21869691A.smali
new file mode 100644
index 0000000..a7a6ef4
--- /dev/null
+++ b/test/800-smali/smali/b_21869691A.smali
@@ -0,0 +1,47 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# When using invoke-interface on a method id that is not from an interface class, we should throw
+# an IncompatibleClassChangeError. FindInterfaceMethod assumes that the given type is an interface,
+# so we can construct a class hierarchy that would have a surprising result:
+#
+#   interface I {
+#     void a();
+#   }
+#
+#   class B implements I {
+#      // miranda method for a, or a implemented.
+#   }
+#
+#   class C extends B {
+#   }
+#
+# Then calling invoke-interface C.a() will go wrong if there is no explicit check: a can't be found
+# in C, but in the interface table, so we will find an interface method and pass ICCE checks.
+#
+# If we do this before a correct invoke-virtual C.a(), we poison the dex cache with an incorrect
+# method. In this test, this is done in A (A < B, so processed first). The "real" call is in B.
+
+.class public LB21869691A;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+  .registers 3
+  new-instance v0, LB21869691C;
+  invoke-direct {v0}, LB21869691C;-><init>()V
+  invoke-virtual {v2, v0}, LB21869691A;->callinf(LB21869691C;)V
+  return-void
+.end method
+
+.method public callinf(LB21869691C;)V
+  .registers 2
+  invoke-interface {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691B.smali b/test/800-smali/smali/b_21869691B.smali
new file mode 100644
index 0000000..1172bdb
--- /dev/null
+++ b/test/800-smali/smali/b_21869691B.smali
@@ -0,0 +1,33 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public abstract LB21869691B;
+
+.super Ljava/lang/Object;
+.implements LB21869691I;
+
+.method protected constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+# Have an implementation for the interface method.
+.method public a()V
+  .registers 1
+  return-void
+.end method
+
+# Call ourself with invoke-virtual.
+.method public callB()V
+  .registers 1
+  invoke-virtual {p0}, LB21869691B;->a()V
+  return-void
+.end method
+
+# Call C with invoke-virtual.
+.method public callB(LB21869691C;)V
+  .registers 2
+  invoke-virtual {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691C.smali b/test/800-smali/smali/b_21869691C.smali
new file mode 100644
index 0000000..4f89a04
--- /dev/null
+++ b/test/800-smali/smali/b_21869691C.smali
@@ -0,0 +1,12 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public LB21869691C;
+
+.super LB21869691B;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LB21869691B;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691I.smali b/test/800-smali/smali/b_21869691I.smali
new file mode 100644
index 0000000..72a27dd
--- /dev/null
+++ b/test/800-smali/smali/b_21869691I.smali
@@ -0,0 +1,11 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# This is the interface class that has an "a" method.
+
+.class public abstract interface LB21869691I;
+
+.super Ljava/lang/Object;
+
+.method public abstract a()V
+.end method
diff --git a/test/800-smali/smali/b_26143249.smali b/test/800-smali/smali/b_26143249.smali
new file mode 100644
index 0000000..aa69e84
--- /dev/null
+++ b/test/800-smali/smali/b_26143249.smali
@@ -0,0 +1,20 @@
+# Make sure we accept non-abstract classes with abstract members.
+
+.class public LB26143249;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+    .registers 1
+    invoke-virtual {p0}, LB26143249;->abs()V
+    return-void
+.end method
+
+.method public abstract abs()V
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 4844848..cc3b0b4 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -139,6 +139,10 @@
                 new Object[] { "abc" }, null, null));
         testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
                 null));
+        testCases.add(new TestCase("b/21869691", "B21869691A", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/26143249", "B26143249", "run", null,
+                new AbstractMethodError(), null));
     }
 
     public void runTests() {
@@ -208,7 +212,7 @@
                                                         tc.expectedException.getClass().getName() +
                                                         ", but got " + exc.getClass(), exc);
             } else {
-              // Expected exception, do nothing.
+                // Expected exception, do nothing.
             }
         } finally {
             if (errorReturn != null) {
diff --git a/test/960-default-smali/build b/test/960-default-smali/build
index 4dc848c..b72afcd 100755
--- a/test/960-default-smali/build
+++ b/test/960-default-smali/build
@@ -22,7 +22,7 @@
 
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
-if [[ $ARGS == *"--jvm"* ]]; then
+if [[ $@ == *"--jvm"* ]]; then
   USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 >& /dev/null; then
diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build
index b4ced3e..005f76c 100755
--- a/test/961-default-iface-resolution-generated/build
+++ b/test/961-default-iface-resolution-generated/build
@@ -33,7 +33,7 @@
 
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
-if [[ $ARGS == *"--jvm"* ]]; then
+if [[ $@ == *"--jvm"* ]]; then
   USES_JAVA_SOURCE="true"
 elif [[ $USE_JACK == "true" ]]; then
   if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 7a22e1b..f74a516 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -37,7 +37,8 @@
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
-  497-inlining-and-class-loader/clear_dex_cache.cc
+  497-inlining-and-class-loader/clear_dex_cache.cc \
+  543-env-long-ref/env_long_ref.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c830ad4..afd833e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -38,11 +38,13 @@
   $(HOST_OUT_EXECUTABLES)/jasmin \
   $(HOST_OUT_EXECUTABLES)/smali \
   $(HOST_OUT_EXECUTABLES)/dexmerger
+TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES :=
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
     $(JACK) \
     $(JILL_JAR)
+  TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
 endif
 
 ifeq ($(ART_TEST_DEBUG_GC),true)
@@ -63,7 +65,7 @@
     run_test_options += --quiet
   endif
 $$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
+$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
@@ -258,8 +260,10 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
@@ -274,7 +278,8 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS := \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -296,6 +301,7 @@
   412-new-array \
   471-uninitialized-locals \
   506-verify-aput \
+  554-jit-profile-file \
   800-smali
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
@@ -354,13 +360,15 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
   137-cfi \
-  138-duplicate-classes-check2
+  138-duplicate-classes-check2 \
+  554-jit-profile-file
 
 # This test fails without an image.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
@@ -411,7 +419,8 @@
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -452,6 +461,7 @@
     441-checker-inliner \
     510-checker-try-catch \
     536-checker-intrinsic-optimization \
+    557-checker-instruction-simplifier-ror \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -464,6 +474,21 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
+# Known broken tests for the mips64 optimizing compiler backend.
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
+    557-checker-instruction-simplifier-ror \
+
+ifeq (mips64,$(TARGET_ARCH))
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS :=
+
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
@@ -488,37 +513,54 @@
 
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
-# Tests that should fail in the read barrier configuration.
-# 055: Exceeds run time limits due to read barrier instrumentation.
-# 137: Read barrier forces interpreter. Cannot run this with the interpreter.
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
-#      handled in the read barrier configuration.
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  055-enum-performance                    \
-  137-cfi                                 \
+
+# Tests that should fail in the read barrier configuration with the default (Quick) compiler.
+# 137: Quick has no support for read barriers and punts to the
+#      interpreter, but CFI unwinding expects managed frames.
+# 554: Quick does not support JIT profiling.
+TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS := \
+  137-cfi \
+  554-jit-profile-file
+
+# Tests that should fail in the read barrier configuration with the Optimizing compiler.
+# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
+#      more parallel moves on x86, thus some Checker assertions may fail.
+# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
+#      not yet handled in the read barrier configuration.
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
+  484-checker-register-hints \
   537-checker-arraycopy
 
 ifeq ($(ART_USE_READ_BARRIER),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-      $(PREBUILD_TYPES),$(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-      $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-      $(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ifneq (,$(filter default,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
-# 137: Quick punts to the interpreter, and this test cannot run this with the interpreter.
+# 137: Quick has no support for read barriers and punts to the
+#      interpreter, but CFI unwinding expects managed frames.
+# 554: Quick does not support JIT profiling.
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 # Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
-# 055-enum-performance: Exceeds run time limits due to heap poisoning instrumentation.
+# 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
   055-enum-performance
-# Tests that should fail in the heap poisoning configuration with the interpreter.
-# 137: Cannot run this with the interpreter.
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
 
 ifeq ($(ART_HEAP_POISONING),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
@@ -534,18 +576,10 @@
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
-
-  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
 endif
 
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
@@ -608,14 +642,16 @@
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION)
 endif
 
 # Create a rule to build and run a tests following the form:
@@ -864,7 +900,7 @@
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
 .PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule)
+$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  DX=$(abspath $(DX)) \
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
diff --git a/test/dexdump/bytecodes.txt b/test/dexdump/bytecodes.txt
index d14c47c..4c8b79b 100755
--- a/test/dexdump/bytecodes.txt
+++ b/test/dexdump/bytecodes.txt
@@ -196,6 +196,7 @@
       name          : 'icon'
       type          : 'I'
       access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 2130837504
   Instance fields   -
   Direct methods    -
     #0              : (in Lcom/google/android/test/R$drawable;)
diff --git a/test/dexdump/bytecodes.xml b/test/dexdump/bytecodes.xml
index 0581677..d08c2e9 100755
--- a/test/dexdump/bytecodes.xml
+++ b/test/dexdump/bytecodes.xml
@@ -3,6 +3,7 @@
 >
 <class name="SuppressLint"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -23,6 +24,7 @@
 </class>
 <class name="TargetApi"
  extends="java.lang.Object"
+ interface="true"
  abstract="true"
  static="false"
  final="false"
@@ -46,6 +48,7 @@
 >
 <class name="BuildConfig"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -70,6 +73,7 @@
 </class>
 <class name="R.attr"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -85,6 +89,7 @@
 </class>
 <class name="R.drawable"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -97,6 +102,7 @@
  static="true"
  final="true"
  visibility="public"
+ value="2130837504"
 >
 </field>
 <constructor name="R.drawable"
@@ -109,6 +115,7 @@
 </class>
 <class name="R"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -124,6 +131,7 @@
 </class>
 <class name="Test"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
diff --git a/test/dexdump/checkers.xml b/test/dexdump/checkers.xml
index 232254f..4e56ea2 100755
--- a/test/dexdump/checkers.xml
+++ b/test/dexdump/checkers.xml
@@ -3,6 +3,7 @@
 >
 <class name="Checkers"
  extends="android.app.Activity"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -112,6 +113,7 @@
 </class>
 <class name="CheckersView"
  extends="android.view.View"
+ interface="false"
  abstract="false"
  static="false"
  final="false"
@@ -331,6 +333,7 @@
 </class>
 <class name="a"
  extends="java.lang.Thread"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
@@ -500,6 +503,7 @@
 </class>
 <class name="g"
  extends="java.lang.Object"
+ interface="false"
  abstract="false"
  static="false"
  final="true"
diff --git a/test/dexdump/staticfields.dex b/test/dexdump/staticfields.dex
new file mode 100644
index 0000000..a07c46e
--- /dev/null
+++ b/test/dexdump/staticfields.dex
Binary files differ
diff --git a/test/dexdump/staticfields.lst b/test/dexdump/staticfields.lst
new file mode 100644
index 0000000..5375b8e
--- /dev/null
+++ b/test/dexdump/staticfields.lst
@@ -0,0 +1,2 @@
+#staticfields.dex
+0x000001bc 8 StaticFields <init> ()V StaticFields.java 24
diff --git a/test/dexdump/staticfields.txt b/test/dexdump/staticfields.txt
new file mode 100644
index 0000000..022605f
--- /dev/null
+++ b/test/dexdump/staticfields.txt
@@ -0,0 +1,126 @@
+Processing 'staticfields.dex'...
+Opened 'staticfields.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : 52d4fc6d
+signature           : 6e82...2f27
+file_size           : 1264
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 28
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 12
+type_ids_off        : 224 (0x0000e0)
+proto_ids_size       : 1
+proto_ids_off        : 272 (0x000110)
+field_ids_size      : 12
+field_ids_off       : 284 (0x00011c)
+method_ids_size     : 2
+method_ids_off      : 380 (0x00017c)
+class_defs_size     : 1
+class_defs_off      : 396 (0x00018c)
+data_size           : 836
+data_off            : 428 (0x0001ac)
+
+Class #0 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 7
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 11
+annotations_off     : 0 (0x000000)
+class_data_off      : 1067 (0x00042b)
+static_fields_size  : 12
+instance_fields_size: 0
+direct_methods_size : 1
+virtual_methods_size: 0
+
+Class #0            -
+  Class descriptor  : 'LStaticFields;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LStaticFields;)
+      name          : 'test00_public_static_final_byte_42'
+      type          : 'B'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 42
+    #1              : (in LStaticFields;)
+      name          : 'test01_public_static_final_short_43'
+      type          : 'S'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 43
+    #2              : (in LStaticFields;)
+      name          : 'test02_public_static_final_char_X'
+      type          : 'C'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 88
+    #3              : (in LStaticFields;)
+      name          : 'test03_public_static_final_int_44'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 44
+    #4              : (in LStaticFields;)
+      name          : 'test04_public_static_final_long_45'
+      type          : 'J'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 45
+    #5              : (in LStaticFields;)
+      name          : 'test05_public_static_final_float_46_47'
+      type          : 'F'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 46.470001
+    #6              : (in LStaticFields;)
+      name          : 'test06_public_static_final_double_48_49'
+      type          : 'D'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : 48.490000
+    #7              : (in LStaticFields;)
+      name          : 'test07_public_static_final_string'
+      type          : 'Ljava/lang/String;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : "abc \\><\"'&\t\r\n"
+    #8              : (in LStaticFields;)
+      name          : 'test08_public_static_final_object_null'
+      type          : 'Ljava/lang/Object;'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : null
+    #9              : (in LStaticFields;)
+      name          : 'test09_public_static_final_boolean_true'
+      type          : 'Z'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+      value         : true
+    #10              : (in LStaticFields;)
+      name          : 'test10_private_static_final_int_50'
+      type          : 'I'
+      access        : 0x001a (PRIVATE STATIC FINAL)
+      value         : 50
+    #11              : (in LStaticFields;)
+      name          : 'test99_empty_value'
+      type          : 'I'
+      access        : 0x0019 (PUBLIC STATIC FINAL)
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LStaticFields;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0001ac:                                        |[0001ac] StaticFields.<init>:()V
+0001bc: 7010 0100 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0001
+0001c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=24
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LStaticFields; 
+
+  Virtual methods   -
+  source_file_idx   : 11 (StaticFields.java)
+
diff --git a/test/dexdump/staticfields.xml b/test/dexdump/staticfields.xml
new file mode 100644
index 0000000..c906f0a
--- /dev/null
+++ b/test/dexdump/staticfields.xml
@@ -0,0 +1,130 @@
+<api>
+<package name=""
+>
+<class name="StaticFields"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<field name="test00_public_static_final_byte_42"
+ type="byte"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="42"
+>
+</field>
+<field name="test01_public_static_final_short_43"
+ type="short"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="43"
+>
+</field>
+<field name="test02_public_static_final_char_X"
+ type="char"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="88"
+>
+</field>
+<field name="test03_public_static_final_int_44"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="44"
+>
+</field>
+<field name="test04_public_static_final_long_45"
+ type="long"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="45"
+>
+</field>
+<field name="test05_public_static_final_float_46_47"
+ type="float"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="46.470001"
+>
+</field>
+<field name="test06_public_static_final_double_48_49"
+ type="double"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="48.490000"
+>
+</field>
+<field name="test07_public_static_final_string"
+ type="java.lang.String"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="abc \>&lt;&quot;'&amp;&#x9;&#xD;&#xA;"
+>
+</field>
+<field name="test08_public_static_final_object_null"
+ type="java.lang.Object"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="null"
+>
+</field>
+<field name="test09_public_static_final_boolean_true"
+ type="boolean"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+ value="true"
+>
+</field>
+<field name="test99_empty_value"
+ type="int"
+ transient="false"
+ volatile="false"
+ static="true"
+ final="true"
+ visibility="public"
+>
+</field>
+<constructor name="StaticFields"
+ type="StaticFields"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+</class>
+</package>
+</api>
diff --git a/test/run-test b/test/run-test
index d0da34e..ac2b52c 100755
--- a/test/run-test
+++ b/test/run-test
@@ -41,7 +41,7 @@
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
-export JAVAC="javac -g"
+export JAVAC="javac -g -source 1.7 -target 1.7 -Xlint:-options"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
@@ -85,7 +85,7 @@
 
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
 if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack"
+  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
 # If JILL_JAR is not set, assume it is located in the prebuilts directory.
@@ -458,7 +458,7 @@
 if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
         framework="${ANDROID_PRODUCT_OUT}/system/framework"
-        bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
+        bpath="${framework}/core-libart.jar:${framework}/core-oj.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
         run_args="${run_args} --boot -Xbootclasspath:${bpath}"
     else
         true # defaults to using target BOOTCLASSPATH
@@ -502,6 +502,7 @@
     # TODO If the target was compiled WITH_DEXPREOPT=true then these tests will
     # fail since these jar files will be stripped.
     bpath="${framework}/core-libart${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
     bpath="${bpath}:${framework}/core-junit${bpath_suffix}.jar"
@@ -669,9 +670,9 @@
 # -------------------------------
 # Return whether the Optimizing compiler has read barrier support for ARCH.
 function arch_supports_read_barrier() {
-  # Optimizing has read barrier support for ARM, x86 and x86-64 at the
+  # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the
   # moment.
-  [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+  [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
 }
 
 # Tests named '<number>-checker-*' will also have their CFGs verified with
@@ -739,8 +740,8 @@
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=16384
-  run_file_size_limit=16384
+  build_file_size_limit=24576
+  run_file_size_limit=24576
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 362ae25..adc4d03 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -19,7 +19,6 @@
  * Show site context and heap and class filter in "Objects" view?
  * Have a menu at the top of an object view with links to the sections?
  * Include ahat version and hprof file in the menu at the top of the page?
- * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
  * For HeapTable with single heap shown, the heap name isn't centered?
@@ -77,6 +76,7 @@
  * Extracting bitmap data from bitmap instances.
  * Adding up allocations by stack frame.
  * Computing, for each instance, the other instances it dominates.
+ * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
  0.2 Oct 20, 2015
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 0bf064e..fc7911b 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -19,6 +19,8 @@
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
@@ -29,8 +31,10 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -48,6 +52,11 @@
   // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
   private List<Instance> mRooted;
 
+  // Map from roots to their types.
+  // Instances are only included if they are roots, and the collection of root
+  // types is guaranteed to be non-empty.
+  private Map<Instance, Collection<RootType>> mRoots;
+
   private Site mRootSite;
   private Map<Heap, Long> mHeapSizes;
 
@@ -113,6 +122,18 @@
       }
       mHeapSizes.put(heap, total);
     }
+
+    // Record the roots and their types.
+    mRoots = new HashMap<Instance, Collection<RootType>>();
+    for (RootObj root : snapshot.getGCRoots()) {
+      Instance inst = root.getReferredInstance();
+      Collection<RootType> types = mRoots.get(inst);
+      if (types == null) {
+        types = new HashSet<RootType>();
+        mRoots.put(inst, types);
+      }
+      types.add(root.getRootType());
+    }
   }
 
   // Note: This method is exposed for testing purposes.
@@ -140,6 +161,21 @@
     return mRooted;
   }
 
+  /**
+   * Returns true if the given instance is a root.
+   */
+  public boolean isRoot(Instance inst) {
+    return mRoots.containsKey(inst);
+  }
+
+  /**
+   * Returns the list of root types for the given instance, or null if the
+   * instance is not a root.
+   */
+  public Collection<RootType> getRootTypes(Instance inst) {
+    return mRoots.get(inst);
+  }
+
   public List<Heap> getHeaps() {
     return mHeaps;
   }
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 34a5665..7a673f5 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -71,7 +71,7 @@
         }
 
         public DocString render(Instance element) {
-          return Value.render(element);
+          return Value.render(mSnapshot, element);
         }
       };
       return Collections.singletonList(value);
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 1305070..06023da 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -23,9 +23,11 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -57,7 +59,7 @@
     }
 
     doc.title("Object %08x", inst.getUniqueId());
-    doc.big(Value.render(inst));
+    doc.big(Value.render(mSnapshot, inst));
 
     printAllocationSite(doc, query, inst);
     printDominatorPath(doc, query, inst);
@@ -65,27 +67,41 @@
     doc.section("Object Info");
     ClassObj cls = inst.getClassObj();
     doc.descriptions();
-    doc.description(DocString.text("Class"), Value.render(cls));
+    doc.description(DocString.text("Class"), Value.render(mSnapshot, cls));
     doc.description(DocString.text("Size"), DocString.format("%d", inst.getSize()));
     doc.description(
         DocString.text("Retained Size"),
         DocString.format("%d", inst.getTotalRetainedSize()));
     doc.description(DocString.text("Heap"), DocString.text(inst.getHeap().getName()));
+
+    Collection<RootType> rootTypes = mSnapshot.getRootTypes(inst);
+    if (rootTypes != null) {
+      DocString types = new DocString();
+      String comma = "";
+      for (RootType type : rootTypes) {
+        types.append(comma);
+        types.append(type.getName());
+        comma = ", ";
+      }
+      doc.description(DocString.text("Root Types"), types);
+    }
+
     doc.end();
 
     printBitmap(doc, inst);
     if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, query, (ClassInstance)inst);
+      printClassInstanceFields(doc, query, mSnapshot, (ClassInstance)inst);
     } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, query, (ArrayInstance)inst);
+      printArrayElements(doc, query, mSnapshot, (ArrayInstance)inst);
     } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, query, (ClassObj)inst);
+      printClassInfo(doc, query, mSnapshot, (ClassObj)inst);
     }
-    printReferences(doc, query, inst);
+    printReferences(doc, query, mSnapshot, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(Doc doc, Query query, ClassInstance inst) {
+  private static void printClassInstanceFields(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassInstance inst) {
     doc.section("Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
     SubsetSelector<ClassInstance.FieldValue> selector
@@ -94,31 +110,35 @@
       doc.row(
           DocString.text(field.getField().getType().toString()),
           DocString.text(field.getField().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printArrayElements(Doc doc, Query query, ArrayInstance array) {
+  private static void printArrayElements(
+      Doc doc, Query query, AhatSnapshot snapshot, ArrayInstance array) {
     doc.section("Array Elements");
     doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
     List<Object> elements = Arrays.asList(array.getValues());
     SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
     int i = 0;
     for (Object elem : selector.selected()) {
-      doc.row(DocString.format("%d", i), Value.render(elem));
+      doc.row(DocString.format("%d", i), Value.render(snapshot, elem));
       i++;
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printClassInfo(Doc doc, Query query, ClassObj clsobj) {
+  private static void printClassInfo(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
-    doc.description(DocString.text("Super Class"), Value.render(clsobj.getSuperClassObj()));
-    doc.description(DocString.text("Class Loader"), Value.render(clsobj.getClassLoader()));
+    doc.description(DocString.text("Super Class"),
+        Value.render(snapshot, clsobj.getSuperClassObj()));
+    doc.description(DocString.text("Class Loader"),
+        Value.render(snapshot, clsobj.getClassLoader()));
     doc.end();
 
     doc.section("Static Fields");
@@ -131,13 +151,14 @@
       doc.row(
           DocString.text(field.getKey().getType().toString()),
           DocString.text(field.getKey().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printReferences(Doc doc, Query query, Instance inst) {
+  private static void printReferences(
+      Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
     if (inst.getHardReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
@@ -146,7 +167,7 @@
       List<Instance> references = inst.getHardReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
       for (Instance ref : selector.selected()) {
-        doc.row(Value.render(ref));
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
       selector.render(doc);
@@ -158,7 +179,7 @@
       List<Instance> references = inst.getSoftReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
       for (Instance ref : selector.selected()) {
-        doc.row(Value.render(ref));
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
       selector.render(doc);
@@ -217,7 +238,7 @@
             if (element == null) {
               return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
             } else {
-              return DocString.text("→ ").append(Value.render(element));
+              return DocString.text("→ ").append(Value.render(mSnapshot, element));
             }
           }
         };
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 8ad3f48..4cfb0a5 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -60,7 +60,7 @@
       doc.row(
           DocString.format("%,d", inst.getSize()),
           DocString.text(inst.getHeap().getName()),
-          Value.render(inst));
+          Value.render(mSnapshot, inst));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 0425a5a..839e220 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -101,7 +101,7 @@
                 site.getStackId(), site.getStackDepth(), info.heap.getName(), className),
             DocString.format("%,14d", info.numInstances)),
           DocString.text(info.heap.getName()),
-          Value.render(info.classObj));
+          Value.render(mSnapshot, info.classObj));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/Value.java b/tools/ahat/src/Value.java
index 7c969b3..847692b 100644
--- a/tools/ahat/src/Value.java
+++ b/tools/ahat/src/Value.java
@@ -32,21 +32,29 @@
   /**
    * Create a DocString representing a summary of the given instance.
    */
-  private static DocString renderInstance(Instance inst) {
-    DocString link = new DocString();
+  private static DocString renderInstance(AhatSnapshot snapshot, Instance inst) {
+    DocString formatted = new DocString();
     if (inst == null) {
-      link.append("(null)");
-      return link;
+      formatted.append("(null)");
+      return formatted;
     }
 
+    // Annotate roots as roots.
+    if (snapshot.isRoot(inst)) {
+      formatted.append("(root) ");
+    }
+
+
     // Annotate classes as classes.
+    DocString link = new DocString();
     if (inst instanceof ClassObj) {
       link.append("class ");
     }
 
     link.append(inst.toString());
+
     URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
-    DocString formatted = DocString.link(objTarget, link);
+    formatted.appendLink(objTarget, link);
 
     // Annotate Strings with their values.
     String stringValue = InstanceUtils.asString(inst, kMaxChars);
@@ -63,7 +71,7 @@
       // It should not be possible for a referent to refer back to the
       // reference object, even indirectly, so there shouldn't be any issues
       // with infinite recursion here.
-      formatted.append(renderInstance(referent));
+      formatted.append(renderInstance(snapshot, referent));
     }
 
     // Annotate DexCache with its location.
@@ -89,9 +97,9 @@
   /**
    * Create a DocString summarizing the given value.
    */
-  public static DocString render(Object val) {
+  public static DocString render(AhatSnapshot snapshot, Object val) {
     if (val instanceof Instance) {
-      return renderInstance((Instance)val);
+      return renderInstance(snapshot, (Instance)val);
     } else {
       return DocString.format("%s", val);
     }
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index a5476f7..b323456 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -170,5 +170,11 @@
   result: EXEC_FAILED,
   names: ["org.apache.harmony.tests.java.util.WeakHashMapTest#test_keySet"],
   bug: 25437292
+},
+{
+  description: "Assertion failing on the concurrent collector configuration.",
+  result: EXEC_FAILED,
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2"],
+  bug: 25883050
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index de27a6f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -28,6 +28,18 @@
   exit 1
 fi
 
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # For the moment, skip JDWP tests when read barriers are enabled, as
+  # they sometimes exhibit a deadlock issue with the concurrent
+  # copying collector in the read barrier configuration, between the
+  # HeapTaskDeamon and the JDWP thread (b/25800335).
+  #
+  # TODO: Re-enable the JDWP tests when this deadlock issue is fixed.
+  echo "JDWP tests are temporarily disabled in the read barrier configuration because of"
+  echo "a deadlock issue (b/25800335)."
+  exit 0
+fi
+
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
 args=$@
@@ -43,9 +55,11 @@
 vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
+host="no"
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
+    host="yes"
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
@@ -114,7 +128,19 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test
+
+vogar_exit_status=$?
+
+echo "Killing stalled dalvikvm processes..."
+if [[ $host == "yes" ]]; then
+  pkill -9 -f /bin/dalvikvm
+else
+  adb shell pkill -9 -f /bin/dalvikvm
+fi
+echo "Done."
+
+exit $vogar_exit_status