Merge "Revert "ART: Better SSA Allocation when recreating SSA""
diff --git a/Android.mk b/Android.mk
index 92339af..312507f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -40,20 +40,18 @@
 
 .PHONY: clean-oat-host
 clean-oat-host:
-	rm -f $(ART_NATIVETEST_OUT)/*.odex
-	rm -f $(ART_NATIVETEST_OUT)/*.oat
-	rm -f $(ART_NATIVETEST_OUT)/*.art
-	rm -f $(ART_TEST_OUT)/*.odex
-	rm -f $(ART_TEST_OUT)/*.oat
-	rm -f $(ART_TEST_OUT)/*.art
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.art
+	rm -rf $(ART_NATIVETEST_OUT)
+	rm -rf $(ART_TEST_OUT)
+	rm -f $(HOST_CORE_IMG_OUT)
+	rm -f $(HOST_CORE_OAT_OUT)
+	rm -f $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/*.odex
+	rm -f $(TARGET_CORE_IMG_OUT)
+	rm -f $(TARGET_CORE_OAT_OUT)
+ifdef TARGET_2ND_ARCH
+	rm -f $(2ND_TARGET_CORE_IMG_OUT)
+	rm -f $(2ND_TARGET_CORE_OAT_OUT)
+endif
+	rm -rf $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.odex
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.oat
 	rm -f $(TARGET_OUT_APPS)/*.odex
@@ -63,30 +61,21 @@
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 endif
-	rm -rf /tmp/test-*/dalvik-cache/*
+	rm -rf /tmp/$(USER)/test-*/dalvik-cache/*
 	rm -rf /tmp/android-data/dalvik-cache/*
 
 .PHONY: clean-oat-target
 clean-oat-target:
 	adb remount
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(ART_TEST_DIR)/*.odex
-	adb shell rm -f $(ART_TEST_DIR)/*.oat
-	adb shell rm -f $(ART_TEST_DIR)/*.art
-ifdef TARGET_2ND_ARCH
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.art
-endif
+	adb shell rm -rf $(ART_NATIVETEST_DIR)
+	adb shell rm -rf $(ART_TEST_DIR)
 	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.art
-	adb shell rm -f system/app/*.odex
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$(DEX2OAT_TARGET_ARCH)
+ifdef TARGET_2ND_ARCH
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+endif
 	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
 
 ifneq ($(art_dont_bother),true)
@@ -108,6 +97,9 @@
 ART_HOST_DEPENDENCIES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar
 ART_HOST_DEPENDENCIES += $(HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+ifdef TARGET_2ND_ARCH
+ART_TARGET_DEPENDENCIES += $(2ND_TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
+endif
 
 ########################################################################
 # test targets
@@ -150,9 +142,21 @@
 ########################################################################
 # host test targets
 
+.PHONY: test-art-host-vixl
+VIXL_TEST_DEPENDENCY :=
+# We can only run the vixl tests on 64-bit hosts (vixl testing issue) when its a
+# top-level build (to declare the vixl test rule).
+ifneq ($(HOST_IS_64_BIT),)
+ifeq ($(ONE_SHOT_MAKEFILE),)
+VIXL_TEST_DEPENDENCY := run-vixl-tests
+endif
+endif
+
+test-art-host-vixl: $(VIXL_TEST_DEPENDENCY)
+
 # "mm test-art-host" to build and run all host tests
 .PHONY: test-art-host
-test-art-host: test-art-host-gtest test-art-host-oat test-art-host-run-test
+test-art-host: test-art-host-gtest test-art-host-oat test-art-host-run-test test-art-host-vixl
 	@echo test-art-host PASSED
 
 .PHONY: test-art-host-interpreter
@@ -186,14 +190,14 @@
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host $(1)
 	@echo test-art-host-run-test-default-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host --interpreter $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) --host --interpreter $(1)
 	@echo test-art-host-run-test-interpreter-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
@@ -228,10 +232,9 @@
 endef
 $(eval $(call call-art-multi-target-rule,declare-test-art-target,test-art-target))
 
-
 define declare-test-art-target-dependencies
 .PHONY: test-art-target-dependencies$(1)
-test-art-target-dependencies$(1): $(ART_TARGET_TEST_DEPENDENCIES$(1)) $(ART_TEST_OUT)/libarttest.so
+test-art-target-dependencies$(1): $(ART_TARGET_TEST_DEPENDENCIES$(1)) $(ART_TARGET_LIBARTTEST_$(1))
 endef
 $(eval $(call call-art-multi-target-rule,declare-test-art-target-dependencies,test-art-target-dependencies))
 
@@ -266,7 +269,7 @@
 endif
 .PHONY: test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
 test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): test-art-target-sync $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) $$($(2)run_test_$(1)) $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(addprefix --runtime-option ,$(DALVIKVM_FLAGS)) $$($(2)run_test_$(1)) $(1)
 	@echo test-art-target-run-test-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) PASSED
 endef
 
@@ -367,7 +370,7 @@
 ########################################################################
 # "m art-host" for just building the files needed to run the art script
 .PHONY: art-host
-art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_OUT_JAVA_LIBRARIES)/core.art $(HOST_OUT)/lib/libjavacore.so
+art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so
 
 .PHONY: art-host-debug
 art-host-debug:   art-host $(HOST_OUT)/lib/libartd.so $(HOST_OUT)/bin/dex2oatd
@@ -391,21 +394,21 @@
 .PHONY: dump-oat-core-host
 ifeq ($(ART_BUILD_HOST),true)
 dump-oat-core-host: $(HOST_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
+	$(OATDUMP) --image=$(HOST_CORE_IMG_LOCATION) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 endif
 
 .PHONY: dump-oat-core-target
 ifeq ($(ART_BUILD_TARGET),true)
 dump-oat-core-target: $(TARGET_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
 endif
 
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
+	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
@@ -449,21 +452,27 @@
 use-art-full:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags ""
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
 	adb shell start
 
+.PHONY: use-artd-full
+use-artd-full:
+	adb root && sleep 3
+	adb shell stop
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell setprop dalvik.vm.dex2oat-flags ""
+	adb shell setprop dalvik.vm.image-dex2oat-flags ""
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libartd.so
+	adb shell start
+
 .PHONY: use-art-smart
 use-art-smart:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
@@ -473,9 +482,7 @@
 use-art-interpret-only:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
@@ -485,9 +492,7 @@
 use-art-verify-none:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
diff --git a/build/Android.common.mk b/build/Android.common.mk
index aaa1490..83c536f 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -37,11 +37,7 @@
 ART_BUILD_HOST_NDEBUG ?= $(WITH_HOST_DALVIK)
 ART_BUILD_HOST_DEBUG ?= $(WITH_HOST_DALVIK)
 
-ifeq ($(BUILD_HOST_64bit),)
-ART_HOST_ARCH := x86
-else
-ART_HOST_ARCH := x86_64
-endif
+ART_HOST_ARCH := $(HOST_ARCH)
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),false)
 $(info Disabling ART_BUILD_TARGET_NDEBUG)
@@ -107,12 +103,20 @@
 
 ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
 DEX2OAT_FLAGS := --compiler-backend=Optimizing
-DALVIKVM_FLAGS := -Xcompiler-option --compiler-backend=Optimizing
+DALVIKVM_FLAGS += -Xcompiler-option --compiler-backend=Optimizing
 endif
 
-LLVM_ROOT_PATH := external/llvm
-# Don't fail a dalvik minimal host build.
--include $(LLVM_ROOT_PATH)/llvm.mk
+#
+# Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS.
+#
+ART_DEFAULT_GC_TYPE ?= CMS
+ART_DEFAULT_GC_TYPE_CFLAGS := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE)
+
+ifeq ($(ART_USE_PORTABLE_COMPILER),true)
+  LLVM_ROOT_PATH := external/llvm
+  # Don't fail a dalvik minimal host build.
+  -include $(LLVM_ROOT_PATH)/llvm.mk
+endif
 
 # Clang build support.
 # Target builds use GCC by default.
@@ -123,23 +127,26 @@
   ART_HOST_CLANG := true
 endif
 
+# enable ART_TARGET_CLANG for ARM64
+ifneq (,$(filter $(TARGET_ARCH),arm64))
+ART_TARGET_CLANG := true
+endif
+
 # directory used for dalvik-cache on device
 ART_DALVIK_CACHE_DIR := /data/dalvik-cache
 
 # directory used for gtests on device
-ART_BASE_NATIVETEST_DIR := /data/nativetest/art
-ART_BASE_NATIVETEST_OUT := $(TARGET_OUT_DATA_NATIVE_TESTS)/art
+ART_NATIVETEST_DIR := /data/nativetest/art
+ART_NATIVETEST_OUT := $(TARGET_OUT_DATA_NATIVE_TESTS)/art
 
-# directory used for tests on device
-ART_BASE_TEST_DIR := /data/art-test
-ART_BASE_TEST_OUT := $(TARGET_OUT_DATA)/art-test
+# directory used for oat tests on device
+ART_TEST_DIR := /data/art-test
+ART_TEST_OUT := $(TARGET_OUT_DATA)/art-test
 
 # Primary vs. secondary
 2ND_TARGET_ARCH := $(TARGET_2ND_ARCH)
 ART_PHONY_TEST_TARGET_SUFFIX :=
 2ND_ART_PHONY_TEST_TARGET_SUFFIX :=
-ART_TARGET_BINARY_SUFFIX :=
-2ND_ART_TARGET_BINARY_SUFFIX :=
 ifdef TARGET_2ND_ARCH
   art_test_primary_suffix :=
   art_test_secondary_suffix :=
@@ -147,27 +154,15 @@
     art_test_primary_suffix := 64
     ART_PHONY_TEST_TARGET_SUFFIX := 64
     2ND_ART_PHONY_TEST_TARGET_SUFFIX := 32
-    ART_TARGET_BINARY_SUFFIX := 64
+    ART_TARGET_ARCH_32 := $(TARGET_2ND_ARCH)
+    ART_TARGET_ARCH_64 := $(TARGET_ARCH)
   else
     # TODO: ???
     $(error Do not know what to do with this multi-target configuration!)
   endif
-  # Primary with primary suffix
-  ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)$(art_test_primary_suffix)
-  ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)$(art_test_primary_suffix)
-  ART_TEST_DIR := $(ART_BASE_TEST_DIR)$(art_test_primary_suffix)
-  ART_TEST_OUT := $(ART_BASE_TEST_OUT)$(art_test_primary_suffix)
-  # Secondary with 2ND_ prefix and secondary suffix
-  2ND_ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)$(art_test_secondary_suffix)
-  2ND_ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)$(art_test_secondary_suffix)
-  2ND_ART_TEST_DIR := $(ART_BASE_TEST_DIR)$(art_test_secondary_suffix)
-  2ND_ART_TEST_OUT := $(ART_BASE_TEST_OUT)$(art_test_secondary_suffix)
 else
-  ART_NATIVETEST_DIR := $(ART_BASE_NATIVETEST_DIR)
-  ART_NATIVETEST_OUT := $(ART_BASE_NATIVETEST_OUT)
-  ART_TEST_DIR := $(ART_BASE_TEST_DIR)
-  ART_TEST_OUT := $(ART_BASE_TEST_OUT)
-  # No secondary
+  ART_TARGET_ARCH_32 := $(TARGET_ARCH)
+  ART_TARGET_ARCH_64 :=
 endif
 
 ART_CPP_EXTENSION := .cc
@@ -195,6 +190,14 @@
 	-Wstrict-aliasing \
 	-fstrict-aliasing
 
+# these are necessary for Clang ARM64 ART builds
+ifeq ($(ART_TARGET_CLANG), true)
+art_cflags += \
+	-Wno-implicit-exception-spec-mismatch \
+	-DNVALGRIND \
+	-Wno-unused-value
+endif
+
 ifeq ($(ART_SMALL_MODE),true)
   art_cflags += -DART_SMALL_MODE=1
 endif
@@ -209,7 +212,13 @@
 endif
 
 art_non_debug_cflags := \
-        -O3
+	-O3
+
+# FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
+ifeq ($(ART_TARGET_CLANG),true)
+art_non_debug_cflags += \
+        -fno-vectorize
+endif
 
 art_debug_cflags := \
 	-O1 \
@@ -218,13 +227,21 @@
 
 ART_HOST_CFLAGS := $(art_cflags) -DANDROID_SMP=1 -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
+ART_HOST_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
 ART_TARGET_CFLAGS := $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
 ifeq ($(TARGET_CPU_SMP),true)
   ART_TARGET_CFLAGS += -DANDROID_SMP=1
 else
-  ART_TARGET_CFLAGS += -DANDROID_SMP=0
+  ifeq ($(TARGET_CPU_SMP),false)
+    ART_TARGET_CFLAGS += -DANDROID_SMP=0
+  else
+    $(warning TARGET_CPU_SMP should be (true|false), found $(TARGET_CPU_SMP))
+    # Make sure we emit barriers for the worst case.
+    ART_TARGET_CFLAGS += -DANDROID_SMP=1
+  endif
 endif
+ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
 # DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is set in ../build/core/dex_preopt.mk based on
 # the TARGET_CPU_VARIANT
@@ -238,8 +255,9 @@
 ifneq ($(filter 4.6 4.6.%, $(TARGET_GCC_VERSION)),)
   ART_TARGET_CFLAGS += -Wthread-safety
 else
+  # FIXME: add -Wthread-safety when the problem is fixed
   ifeq ($(ART_TARGET_CLANG),true)
-    ART_TARGET_CFLAGS += -Wthread-safety
+    ART_TARGET_CFLAGS +=
   else
     # Warn if -Wthread-safety is not suport and not doing a top-level or 'mma' build.
     ifneq ($(ONE_SHOT_MAKEFILE),)
@@ -304,7 +322,7 @@
 #         Has one argument, the suffix
 define call-art-multi-target
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
   endif
@@ -329,10 +347,10 @@
 #         Has one argument, the suffix
 define call-art-multi-target-var
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
-    
+
     # Link both together, if it makes sense
     ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
       ifneq ($(2ND_ART_PHONY_TEST_TARGET_SUFFIX),)
@@ -351,10 +369,10 @@
 #       We assume we can link the names together easily...
 define call-art-multi-target-rule
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
     # Link both together, if it makes sense
     ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
       ifneq ($(2ND_ART_PHONY_TEST_TARGET_SUFFIX),)
@@ -365,5 +383,25 @@
   endif
 endef
 
+HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT := $(ART_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.art
+TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.art
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.art
+endif
+
+HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art
+TARGET_CORE_IMG_LOCATION := $(ART_TEST_OUT)/core.art
 
 endif # ANDROID_COMMON_MK
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 6aa1c18..a186e85 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -56,7 +56,7 @@
   LOCAL_MODULE_TAGS := optional
   LOCAL_SRC_FILES := $$(art_source)
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime $$(art_c_includes)
-  LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries) # libnativehelper
+  LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries)
 
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := $$(art_executable)
@@ -99,13 +99,12 @@
     LOCAL_MULTILIB := $$(art_multilib)
   endif
 
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include art/build/Android.libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   else # host
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     ART_HOST_EXECUTABLES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   endif
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 429c523..20e6aad 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -47,6 +47,7 @@
 	runtime/gc/space/rosalloc_space_random_test.cc \
 	runtime/gc/space/large_object_space_test.cc \
 	runtime/gtest_test.cc \
+	runtime/handle_scope_test.cc \
 	runtime/indenter_test.cc \
 	runtime/indirect_reference_table_test.cc \
 	runtime/instruction_set_test.cc \
@@ -62,8 +63,7 @@
 	runtime/utils_test.cc \
 	runtime/verifier/method_verifier_test.cc \
 	runtime/verifier/reg_type_test.cc \
-	runtime/zip_archive_test.cc \
-	runtime/stack_indirect_reference_table_test.cc
+	runtime/zip_archive_test.cc
 
 COMPILER_GTEST_COMMON_SRC_FILES := \
 	runtime/jni_internal_test.cc \
@@ -78,6 +78,11 @@
 	compiler/oat_test.cc \
 	compiler/optimizing/codegen_test.cc \
 	compiler/optimizing/dominator_test.cc \
+	compiler/optimizing/find_loops_test.cc \
+	compiler/optimizing/linearize_test.cc \
+	compiler/optimizing/liveness_test.cc \
+	compiler/optimizing/live_ranges_test.cc \
+	compiler/optimizing/parallel_move_test.cc \
 	compiler/optimizing/pretty_printer_test.cc \
 	compiler/optimizing/ssa_test.cc \
 	compiler/output_stream_test.cc \
@@ -126,12 +131,12 @@
 # (1) Prefix for variables
 define build-art-test-make-target
 .PHONY: $$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX)
-$$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX): $($(1)ART_NATIVETEST_OUT)/$$(LOCAL_MODULE) test-art-target-sync
-	adb shell touch $($(1)ART_TEST_DIR)/$$@
-	adb shell rm $($(1)ART_TEST_DIR)/$$@
-	adb shell chmod 755 $($(1)ART_NATIVETEST_DIR)/$$(notdir $$<)
-	adb shell sh -c "$($(1)ART_NATIVETEST_DIR)/$$(notdir $$<) && touch $($(1)ART_TEST_DIR)/$$@"
-	$(hide) (adb pull $($(1)ART_TEST_DIR)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
+$$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX): $(ART_NATIVETEST_OUT)/$(TARGET_$(1)ARCH)/$$(LOCAL_MODULE) test-art-target-sync
+	adb shell touch $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@
+	adb shell rm $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@
+	adb shell chmod 755 $(ART_NATIVETEST_DIR)/$(TARGET_$(1)ARCH)/$$(notdir $$<)
+	adb shell sh -c "$(ART_NATIVETEST_DIR)/$(TARGET_$(1)ARCH)/$$(notdir $$<) && touch $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@"
+	$(hide) (adb pull $(ART_TEST_DIR)/$(TARGET_$(1)ARCH)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
 	$(hide) rm /tmp/$$@
 
   ART_TARGET_GTEST_TARGETS$($(1)ART_PHONY_TEST_TARGET_SUFFIX) += $$(art_gtest_target)$($(1)ART_PHONY_TEST_TARGET_SUFFIX)
@@ -178,25 +183,25 @@
   endif
 
   LOCAL_CFLAGS := $(ART_TEST_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
     LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
     LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
-    LOCAL_STATIC_LIBRARIES += libgtest
-    LOCAL_MODULE_PATH_32 := $(ART_BASE_NATIVETEST_OUT)
-    LOCAL_MODULE_PATH_64 := $(ART_BASE_NATIVETEST_OUT)64
+    LOCAL_STATIC_LIBRARIES += libgtest_libc++
+    LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    include art/build/Android.libcxx.mk
     include $(BUILD_EXECUTABLE)
     
-    ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$$(LOCAL_MODULE)
+    ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
     art_gtest_target := test-art-$$(art_target_or_host)-gtest-$$(art_gtest_name)
 
     ifdef TARGET_2ND_ARCH
       $(call build-art-test-make-target,2ND_)
 
-      ART_TARGET_GTEST_EXECUTABLES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(2ND_ART_NATIVETEST_OUT)/$$(LOCAL_MODULE)
+      ART_TARGET_GTEST_EXECUTABLES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_2ND_ARCH)/$$(LOCAL_MODULE)
 
       # Bind the primary to the non-suffix rule
       ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
@@ -212,11 +217,10 @@
     LOCAL_STATIC_LIBRARIES += libcutils libvixl
     ifneq ($(WITHOUT_HOST_CLANG),true)
         # GCC host compiled tests fail with this linked, presumably due to destructors that run.
-        LOCAL_STATIC_LIBRARIES += libgtest_host
+        LOCAL_STATIC_LIBRARIES += libgtest_libc++_host
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     art_gtest_exe := $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
     ART_HOST_GTEST_EXECUTABLES += $$(art_gtest_exe)
@@ -247,3 +251,9 @@
     $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
   endif
 endif
+
+# Used outside the art project to get a list of the current tests
+RUNTIME_TARGET_GTEST_MAKE_TARGETS :=
+$(foreach file, $(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval RUNTIME_TARGET_GTEST_MAKE_TARGETS += $$(notdir $$(basename $$(file)))))
+COMPILER_TARGET_GTEST_MAKE_TARGETS :=
+$(foreach file, $(COMPILER_GTEST_TARGET_SRC_FILES), $(eval COMPILER_TARGET_GTEST_MAKE_TARGETS += $$(notdir $$(basename $$(file)))))
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index 18d321a..9e5f3d6 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -20,6 +20,11 @@
 	test/StackWalk/stack_walk_jni.cc \
 	test/UnsafeTest/unsafe_test.cc
 
+ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
+ifdef TARGET_2ND_ARCH
+  ART_TARGET_LIBARTTEST_$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TEST_OUT)/$(TARGET_2ND_ARCH)/libarttest.so
+endif
+
 # $(1): target or host
 define build-libarttest
   ifneq ($(1),target)
@@ -41,6 +46,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
   LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
@@ -48,21 +54,19 @@
     LOCAL_SHARED_LIBRARIES += libdl libcutils
     LOCAL_STATIC_LIBRARIES := libgtest
     LOCAL_MULTILIB := both
-    LOCAL_MODULE_PATH_32 := $(ART_BASE_TEST_OUT)
-    LOCAL_MODULE_PATH_64 := $(ART_BASE_TEST_OUT)64
+    LOCAL_MODULE_PATH_32 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_32)
+    LOCAL_MODULE_PATH_64 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
-    include art/build/Android.libcxx.mk
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
     LOCAL_STATIC_LIBRARIES := libcutils
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
     LOCAL_IS_HOST_MODULE := true
-    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_SHARED_LIBRARY)
   endif
 endef
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 9d7579d..bf07ecc 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -29,18 +29,6 @@
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
-HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/core.oat
-TARGET_CORE_OAT := $(ART_TEST_DIR)/core.oat
-2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/core.oat
-
-HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/core.oat
-TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/core.oat
-2ND_TARGET_CORE_OAT_OUT := $(2ND_ART_TEST_OUT)/core.oat
-
-HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/core.art
-TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/core.art
-2ND_TARGET_CORE_IMG_OUT := $(2ND_ART_TEST_OUT)/core.art
-
 TARGET_INSTRUCTION_SET_FEATURES := $(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
 # Use dex2oat debug version for better error reporting
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e3201e7..021392c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -27,6 +27,12 @@
 	dex/quick/arm/int_arm.cc \
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
+	dex/quick/arm64/assemble_arm64.cc \
+	dex/quick/arm64/call_arm64.cc \
+	dex/quick/arm64/fp_arm64.cc \
+	dex/quick/arm64/int_arm64.cc \
+	dex/quick/arm64/target_arm64.cc \
+	dex/quick/arm64/utility_arm64.cc \
 	dex/quick/codegen_util.cc \
 	dex/quick/dex_file_method_inliner.cc \
 	dex/quick/dex_file_to_method_inliner_map.cc \
@@ -53,9 +59,8 @@
 	dex/mir_field_info.cc \
 	dex/mir_method_info.cc \
 	dex/mir_optimization.cc \
-	dex/pass_driver.cc \
 	dex/bb_optimizations.cc \
-	dex/bit_vector_block_iterator.cc \
+	dex/pass_driver_me.cc \
 	dex/frontend.cc \
 	dex/mir_graph.cc \
 	dex/mir_analysis.cc \
@@ -76,9 +81,13 @@
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_x86.cc \
+	optimizing/graph_visualizer.cc \
+	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
+	optimizing/parallel_move_resolver.cc \
 	optimizing/ssa_builder.cc \
+	optimizing/ssa_liveness_analysis.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
@@ -162,7 +171,6 @@
   ifeq ($$(art_target_or_host),host)
     LOCAL_IS_HOST_MODULE := true
   endif
-  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
@@ -187,6 +195,7 @@
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
@@ -239,7 +248,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
 
   ifeq ($$(art_target_or_host),host)
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 8bba84a..5050d4e 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -130,13 +130,13 @@
   return result;
 }
 
+// Normally the ClassLinker supplies this.
+extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
+
 class CommonCompilerTest : public CommonRuntimeTest {
  public:
   // Create an OatMethod based on pointers (for unit tests).
   OatFile::OatMethod CreateOatMethod(const void* code,
-                                     const size_t frame_size_in_bytes,
-                                     const uint32_t core_spill_mask,
-                                     const uint32_t fp_spill_mask,
                                      const uint8_t* gc_map) {
     CHECK(code != nullptr);
     const byte* base;
@@ -154,9 +154,6 @@
     }
     return OatFile::OatMethod(base,
                               code_offset,
-                              frame_size_in_bytes,
-                              core_spill_mask,
-                              fp_spill_mask,
                               gc_map_offset);
   }
 
@@ -179,11 +176,14 @@
         CHECK_NE(0u, code_size);
         const std::vector<uint8_t>& vmap_table = compiled_method->GetVmapTable();
         uint32_t vmap_table_offset = vmap_table.empty() ? 0u
-            : sizeof(OatMethodHeader) + vmap_table.size();
+            : sizeof(OatQuickMethodHeader) + vmap_table.size();
         const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
         uint32_t mapping_table_offset = mapping_table.empty() ? 0u
-            : sizeof(OatMethodHeader) + vmap_table.size() + mapping_table.size();
-        OatMethodHeader method_header(vmap_table_offset, mapping_table_offset, code_size);
+            : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size();
+        OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
+                                           compiled_method->GetFrameSizeInBytes(),
+                                           compiled_method->GetCoreSpillMask(),
+                                           compiled_method->GetFpSpillMask(), code_size);
 
         header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
         std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
@@ -207,11 +207,7 @@
       const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                             compiled_method->GetInstructionSet());
       LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
-      OatFile::OatMethod oat_method = CreateOatMethod(method_code,
-                                                      compiled_method->GetFrameSizeInBytes(),
-                                                      compiled_method->GetCoreSpillMask(),
-                                                      compiled_method->GetFpSpillMask(),
-                                                      nullptr);
+      OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
       oat_method.LinkMethod(method);
       method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
     } else {
@@ -220,28 +216,13 @@
       if (!method->IsNative()) {
         const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge()
                                                        : GetQuickToInterpreterBridge();
-        OatFile::OatMethod oat_method = CreateOatMethod(method_code,
-                                                        kStackAlignment,
-                                                        0,
-                                                        0,
-                                                        nullptr);
+        OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
         oat_method.LinkMethod(method);
         method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
       } else {
-        const void* method_code = GetQuickGenericJniTrampoline();
-        mirror::ArtMethod* callee_save_method = runtime_->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+        const void* method_code = reinterpret_cast<void*>(art_quick_generic_jni_trampoline);
 
-        // Compute Sirt size, as Sirt goes into frame
-        MethodHelper mh(method);
-        uint32_t sirt_refs = mh.GetNumberOfReferenceArgsWithoutReceiver() + 1;
-        uint32_t sirt_size = StackIndirectReferenceTable::SizeOf(sirt_refs);
-
-        OatFile::OatMethod oat_method = CreateOatMethod(method_code,
-                                                        callee_save_method->GetFrameSizeInBytes() +
-                                                            sirt_size,
-                                                        callee_save_method->GetCoreSpillMask(),
-                                                        callee_save_method->GetFpSpillMask(),
-                                                        nullptr);
+        OatFile::OatMethod oat_method = CreateOatMethod(method_code, nullptr);
         oat_method.LinkMethod(method);
         method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
       }
@@ -279,7 +260,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     Thread* self = Thread::Current();
-    SirtRef<mirror::ClassLoader> loader(self, class_loader);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
     mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
     CHECK(klass != nullptr) << "Class not found " << class_name;
     for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
@@ -323,11 +305,12 @@
       compiler_options_->SetCompilerFilter(CompilerOptions::kInterpretOnly);
 #endif
 
+      runtime_->SetInstructionSet(instruction_set);
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
         Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
         if (!runtime_->HasCalleeSaveMethod(type)) {
           runtime_->SetCalleeSaveMethod(
-              runtime_->CreateCalleeSaveMethod(instruction_set, type), type);
+              runtime_->CreateCalleeSaveMethod(type), type);
         }
       }
 
@@ -373,7 +356,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     Thread* self = Thread::Current();
-    SirtRef<mirror::ClassLoader> loader(self, class_loader);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
     mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
     CHECK(klass != nullptr) << "Class not found " << class_name;
     for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
@@ -393,7 +377,7 @@
     timings.EndSplit();
   }
 
-  void CompileDirectMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+  void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                            const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
@@ -406,7 +390,7 @@
     CompileMethod(method);
   }
 
-  void CompileVirtualMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+  void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
                             const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
@@ -436,18 +420,18 @@
     image_reservation_.reset();
   }
 
-  UniquePtr<CompilerOptions> compiler_options_;
-  UniquePtr<VerificationResults> verification_results_;
-  UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
-  UniquePtr<CompilerCallbacksImpl> callbacks_;
-  UniquePtr<CompilerDriver> compiler_driver_;
-  UniquePtr<CumulativeLogger> timer_;
+  std::unique_ptr<CompilerOptions> compiler_options_;
+  std::unique_ptr<VerificationResults> verification_results_;
+  std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
+  std::unique_ptr<CompilerCallbacksImpl> callbacks_;
+  std::unique_ptr<CompilerDriver> compiler_driver_;
+  std::unique_ptr<CumulativeLogger> timer_;
 
  private:
-  UniquePtr<MemMap> image_reservation_;
+  std::unique_ptr<MemMap> image_reservation_;
 
   // Chunks must not move their storage after being created - use the node-based std::list.
-  std::list<std::vector<uint8_t> > header_code_and_maps_chunks_;
+  std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
 }  // namespace art
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 59ed827..7441dac 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -138,7 +138,7 @@
   oatdata_offsets_to_compiled_code_offset_.push_back(offset);
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
@@ -148,48 +148,48 @@
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map,
                                const std::vector<uint8_t>* cfi_info)
-    : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
+    : CompiledCode(driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-  mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
-  vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
-  gc_map_(driver.DeduplicateGCMap(native_gc_map)),
-  cfi_info_(driver.DeduplicateCFIInfo(cfi_info)) {
+  mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
+  vmap_table_(driver->DeduplicateVMapTable(vmap_table)),
+  gc_map_(driver->DeduplicateGCMap(native_gc_map)),
+  cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver,
+CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const std::vector<uint8_t>& code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask)
-    : CompiledCode(&driver, instruction_set, code),
+    : CompiledCode(driver, instruction_set, code),
       frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
-      mapping_table_(driver.DeduplicateMappingTable(std::vector<uint8_t>())),
-      vmap_table_(driver.DeduplicateVMapTable(std::vector<uint8_t>())),
-      gc_map_(driver.DeduplicateGCMap(std::vector<uint8_t>())),
+      mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
+      vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
+      gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
       cfi_info_(nullptr) {
 }
 
 // Constructs a CompiledMethod for the Portable compiler.
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::vector<uint8_t>& gc_map,
                                const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
-      fp_spill_mask_(0), gc_map_(driver.DeduplicateGCMap(gc_map)) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
+      fp_spill_mask_(0), gc_map_(driver->DeduplicateGCMap(gc_map)) {
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
 }
 
-CompiledMethod::CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set,
+CompiledMethod::CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set,
                                const std::string& code, const std::string& symbol)
-    : CompiledCode(&driver, instruction_set, code, symbol),
+    : CompiledCode(driver, instruction_set, code, symbol),
       frame_size_in_bytes_(kStackAlignment), core_spill_mask_(0),
       fp_spill_mask_(0) {
-  mapping_table_ = driver.DeduplicateMappingTable(std::vector<uint8_t>());
-  vmap_table_ = driver.DeduplicateVMapTable(std::vector<uint8_t>());
-  gc_map_ = driver.DeduplicateGCMap(std::vector<uint8_t>());
+  mapping_table_ = driver->DeduplicateMappingTable(std::vector<uint8_t>());
+  vmap_table_ = driver->DeduplicateVMapTable(std::vector<uint8_t>());
+  gc_map_ = driver->DeduplicateGCMap(std::vector<uint8_t>());
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 90ae6ee..23cd250 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -17,12 +17,12 @@
 #ifndef ART_COMPILER_COMPILED_METHOD_H_
 #define ART_COMPILER_COMPILED_METHOD_H_
 
+#include <memory>
 #include <string>
 #include <vector>
 
 #include "instruction_set.h"
 #include "utils.h"
-#include "UniquePtr.h"
 
 namespace llvm {
   class Function;
@@ -102,7 +102,7 @@
 class CompiledMethod : public CompiledCode {
  public:
   // Constructs a CompiledMethod for the non-LLVM compilers.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -114,7 +114,7 @@
                  const std::vector<uint8_t>* cfi_info);
 
   // Constructs a CompiledMethod for the QuickJniCompiler.
-  CompiledMethod(CompilerDriver& driver,
+  CompiledMethod(CompilerDriver* driver,
                  InstructionSet instruction_set,
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
@@ -122,11 +122,11 @@
                  const uint32_t fp_spill_mask);
 
   // Constructs a CompiledMethod for the Portable compiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::vector<uint8_t>& gc_map, const std::string& symbol);
 
   // Constructs a CompiledMethod for the Portable JniCompiler.
-  CompiledMethod(CompilerDriver& driver, InstructionSet instruction_set, const std::string& code,
+  CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
                  const std::string& symbol);
 
   ~CompiledMethod() {}
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index c88c38e..a832c31 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 #ifdef ART_SEA_IR_MODE
-extern "C" art::CompiledMethod* SeaIrCompileMethod(art::CompilerDriver& driver,
-                                                   const art::DexFile::CodeItem* code_item,
+extern "C" art::CompiledMethod* SeaIrCompileMethod(const art::DexFile::CodeItem* code_item,
                                                    uint32_t access_flags,
                                                    art::InvokeType invoke_type,
                                                    uint16_t class_def_idx,
@@ -38,8 +37,7 @@
 #endif
 
 
-CompiledMethod* Compiler::TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                              const art::DexFile::CodeItem* code_item,
+CompiledMethod* Compiler::TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                               uint32_t access_flags,
                                               art::InvokeType invoke_type,
                                               uint16_t class_def_idx,
@@ -47,13 +45,10 @@
                                               jobject class_loader,
                                               const art::DexFile& dex_file) {
 #ifdef ART_SEA_IR_MODE
-    bool use_sea = Runtime::Current()->IsSeaIRMode();
-    use_sea = use_sea &&
-        (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
+    bool use_sea = (std::string::npos != PrettyMethod(method_idx, dex_file).find("fibonacci"));
     if (use_sea) {
       LOG(INFO) << "Using SEA IR to compile..." << std::endl;
-      return SeaIrCompileMethod(compiler,
-                                code_item,
+      return SeaIrCompileMethod(code_item,
                                 access_flags,
                                 invoke_type,
                                 class_def_idx,
@@ -68,11 +63,11 @@
 
 #ifdef ART_USE_PORTABLE_COMPILER
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver);
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver);
 
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -81,45 +76,45 @@
                                                  jobject class_loader,
                                                  const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file);
 
-extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver,
+extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver* driver,
                                                std::string const& filename);
 
 
-class LLVMCompiler : public Compiler {
+class LLVMCompiler FINAL : public Compiler {
  public:
-  LLVMCompiler() : Compiler(1000) {}
+  explicit LLVMCompiler(CompilerDriver* driver) : Compiler(driver, 1000) {}
 
-  void Init(CompilerDriver& driver) const {
-    ArtInitCompilerContext(driver);
+  void Init() const OVERRIDE {
+    ArtInitCompilerContext(GetCompilerDriver());
   }
 
-  void UnInit(CompilerDriver& driver) const {
-    ArtUnInitCompilerContext(driver);
+  void UnInit() const OVERRIDE {
+    ArtUnInitCompilerContext(GetCompilerDriver());
   }
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
                           uint32_t method_idx,
                           jobject class_loader,
-                          const DexFile& dex_file) const {
-    CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                                 code_item,
+                          const DexFile& dex_file) const OVERRIDE {
+    CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                  access_flags,
                                                  invoke_type,
                                                  class_def_idx,
                                                  method_idx,
                                                  class_loader,
                                                  dex_file);
-    if (method != nullptr) return method;
+    if (method != nullptr) {
+      return method;
+    }
 
-    return ArtCompileMethod(compiler,
+    return ArtCompileMethod(GetCompilerDriver(),
                             code_item,
                             access_flags,
                             invoke_type,
@@ -129,11 +124,10 @@
                             dex_file);
   }
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const {
-    return ArtLLVMJniCompileMethod(driver, access_flags, method_idx, dex_file);
+                             const DexFile& dex_file) const OVERRIDE {
+    return ArtLLVMJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
   }
 
   uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -182,17 +176,17 @@
 };
 #endif
 
-Compiler* Compiler::Create(Compiler::Kind kind) {
+Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) {
   switch (kind) {
     case kQuick:
-      return new QuickCompiler();
+      return new QuickCompiler(driver);
       break;
     case kOptimizing:
-      return new OptimizingCompiler();
+      return new OptimizingCompiler(driver);
       break;
     case kPortable:
 #ifdef ART_USE_PORTABLE_COMPILER
-      return new LLVMCompiler();
+      return new LLVMCompiler(driver);
 #else
       LOG(FATAL) << "Portable compiler not compiled";
 #endif
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 2357297..4caebf3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -41,18 +41,13 @@
     kPortable
   };
 
-  explicit Compiler(uint64_t warning)
-      : maximum_compilation_time_before_warning_(warning) {
-  }
+  static Compiler* Create(CompilerDriver* driver, Kind kind);
 
-  static Compiler* Create(Kind kind);
+  virtual void Init() const = 0;
 
-  virtual void Init(CompilerDriver& driver) const = 0;
+  virtual void UnInit() const = 0;
 
-  virtual void UnInit(CompilerDriver& driver) const = 0;
-
-  virtual CompiledMethod* Compile(CompilerDriver& driver,
-                                  const DexFile::CodeItem* code_item,
+  virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                                   uint32_t access_flags,
                                   InvokeType invoke_type,
                                   uint16_t class_def_idx,
@@ -60,8 +55,7 @@
                                   jobject class_loader,
                                   const DexFile& dex_file) const = 0;
 
-  static CompiledMethod* TryCompileWithSeaIR(art::CompilerDriver& driver,
-                                             const art::DexFile::CodeItem* code_item,
+  static CompiledMethod* TryCompileWithSeaIR(const art::DexFile::CodeItem* code_item,
                                              uint32_t access_flags,
                                              art::InvokeType invoke_type,
                                              uint16_t class_def_idx,
@@ -69,8 +63,7 @@
                                              jobject class_loader,
                                              const art::DexFile& dex_file);
 
-  virtual CompiledMethod* JniCompile(CompilerDriver& driver,
-                                     uint32_t access_flags,
+  virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
                                      const DexFile& dex_file) const = 0;
 
@@ -81,11 +74,10 @@
                         OatWriter* oat_writer,
                         const std::vector<const art::DexFile*>& dex_files,
                         const std::string& android_root,
-                        bool is_host, const CompilerDriver& driver) const
+                        bool is_host) const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  virtual Backend* GetCodeGenerator(CompilationUnit* cu,
-                                    void* compilation_unit) const = 0;
+  virtual Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const = 0;
 
   uint64_t GetMaximumCompilationTimeBeforeWarning() const {
     return maximum_compilation_time_before_warning_;
@@ -117,7 +109,17 @@
     return nullptr;
   }
 
+ protected:
+  explicit Compiler(CompilerDriver* driver, uint64_t warning) :
+      driver_(driver), maximum_compilation_time_before_warning_(warning) {
+  }
+
+  CompilerDriver* GetCompilerDriver() const {
+    return driver_;
+  }
+
  private:
+  CompilerDriver* const driver_;
   const uint64_t maximum_compilation_time_before_warning_;
 
   DISALLOW_COPY_AND_ASSIGN(Compiler);
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 1237e70..76838d7 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -22,9 +22,9 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver);
-extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver& driver,
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver);
+extern "C" art::CompiledMethod* ArtQuickCompileMethod(art::CompilerDriver* driver,
                                                       const art::DexFile::CodeItem* code_item,
                                                       uint32_t access_flags,
                                                       art::InvokeType invoke_type,
@@ -33,40 +33,40 @@
                                                       jobject class_loader,
                                                       const art::DexFile& dex_file);
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* driver,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file);
 
 // Hack for CFI CIE initialization
 extern std::vector<uint8_t>* X86CFIInitialization();
 
-void QuickCompiler::Init(CompilerDriver& driver) const {
-  ArtInitQuickCompilerContext(driver);
+void QuickCompiler::Init() const {
+  ArtInitQuickCompilerContext(GetCompilerDriver());
 }
 
-void QuickCompiler::UnInit(CompilerDriver& driver) const {
-  ArtUnInitQuickCompilerContext(driver);
+void QuickCompiler::UnInit() const {
+  ArtUnInitQuickCompilerContext(GetCompilerDriver());
 }
 
-CompiledMethod* QuickCompiler::Compile(CompilerDriver& driver,
-                                      const DexFile::CodeItem* code_item,
-                                      uint32_t access_flags,
-                                      InvokeType invoke_type,
-                                      uint16_t class_def_idx,
-                                      uint32_t method_idx,
-                                      jobject class_loader,
-                                      const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompileWithSeaIR(driver,
-                                               code_item,
+CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
+                                       uint32_t access_flags,
+                                       InvokeType invoke_type,
+                                       uint16_t class_def_idx,
+                                       uint32_t method_idx,
+                                       jobject class_loader,
+                                       const DexFile& dex_file) const {
+  CompiledMethod* method = TryCompileWithSeaIR(code_item,
                                                access_flags,
                                                invoke_type,
                                                class_def_idx,
                                                method_idx,
                                                class_loader,
                                                dex_file);
-  if (method != nullptr) return method;
+  if (method != nullptr) {
+    return method;
+  }
 
-  return ArtQuickCompileMethod(driver,
+  return ArtQuickCompileMethod(GetCompilerDriver(),
                                code_item,
                                access_flags,
                                invoke_type,
@@ -76,11 +76,10 @@
                                dex_file);
 }
 
-CompiledMethod* QuickCompiler::JniCompile(CompilerDriver& driver,
-                                          uint32_t access_flags,
+CompiledMethod* QuickCompiler::JniCompile(uint32_t access_flags,
                                           uint32_t method_idx,
                                           const DexFile& dex_file) const {
-  return ArtQuickJniCompileMethod(driver, access_flags, method_idx, dex_file);
+  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
 }
 
 uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
@@ -88,11 +87,12 @@
 }
 
 bool QuickCompiler::WriteElf(art::File* file,
-                            OatWriter* oat_writer,
-                            const std::vector<const art::DexFile*>& dex_files,
-                            const std::string& android_root,
-                            bool is_host, const CompilerDriver& driver) const {
-  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host, driver);
+                             OatWriter* oat_writer,
+                             const std::vector<const art::DexFile*>& dex_files,
+                             const std::string& android_root,
+                             bool is_host) const {
+  return art::ElfWriterQuick::Create(file, oat_writer, dex_files, android_root, is_host,
+                                     *GetCompilerDriver());
 }
 
 Backend* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
@@ -101,6 +101,9 @@
     case kThumb2:
       mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
+    case kArm64:
+      mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      break;
     case kMips:
       mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
@@ -108,7 +111,7 @@
       mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kX86_64:
-      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      mir_to_lir = X86_64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     default:
       LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
@@ -134,22 +137,21 @@
   return nullptr;
 }
 
-CompiledMethod* OptimizingCompiler::Compile(CompilerDriver& driver,
-                                            const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
                                             uint16_t class_def_idx,
                                             uint32_t method_idx,
                                             jobject class_loader,
                                             const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
-  if (method != nullptr) return method;
+  CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                                      method_idx, class_loader, dex_file);
+  if (method != nullptr) {
+    return method;
+  }
 
-  return QuickCompiler::Compile(
-      driver, code_item, access_flags, invoke_type, class_def_idx, method_idx,
-      class_loader, dex_file);
+  return QuickCompiler::Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
+                                class_loader, dex_file);
 }
 
 }  // namespace art
diff --git a/compiler/compilers.h b/compiler/compilers.h
index 255dd23..2c231e1 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -23,14 +23,13 @@
 
 class QuickCompiler : public Compiler {
  public:
-  QuickCompiler() : Compiler(100) {}
+  explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {}
 
-  void Init(CompilerDriver& driver) const OVERRIDE;
+  void Init() const OVERRIDE;
 
-  void UnInit(CompilerDriver& driver) const OVERRIDE;
+  void UnInit() const OVERRIDE;
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -38,8 +37,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* JniCompile(CompilerDriver& driver,
-                             uint32_t access_flags,
+  CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE;
 
@@ -50,7 +48,7 @@
                 OatWriter* oat_writer,
                 const std::vector<const art::DexFile*>& dex_files,
                 const std::string& android_root,
-                bool is_host, const CompilerDriver& driver) const
+                bool is_host) const
     OVERRIDE
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -73,12 +71,11 @@
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
 };
 
-class OptimizingCompiler : public QuickCompiler {
+class OptimizingCompiler FINAL : public QuickCompiler {
  public:
-  OptimizingCompiler() { }
+  explicit OptimizingCompiler(CompilerDriver* driver);
 
-  CompiledMethod* Compile(CompilerDriver& driver,
-                          const DexFile::CodeItem* code_item,
+  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
                           InvokeType invoke_type,
                           uint16_t class_def_idx,
@@ -86,8 +83,7 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* TryCompile(CompilerDriver& driver,
-                             const DexFile::CodeItem* code_item,
+  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
                              uint32_t access_flags,
                              InvokeType invoke_type,
                              uint16_t class_def_idx,
@@ -96,6 +92,8 @@
                              const DexFile& dex_file) const;
 
  private:
+  std::unique_ptr<std::ostream> visualizer_output_;
+
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
 
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index abfa7a7..8b5eba0 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,13 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool CodeLayout::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,23 +38,42 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+void SSATransformation::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
+  cUnit->mir_graph->SSATransformationStart();
+}
+
+bool SSATransformation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit* cUnit) const {
-  // Verify the dataflow information after the pass.
-  if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
-    cUnit->mir_graph->VerifyDataflow();
-  }
+void SSATransformation::End(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
+  cUnit->mir_graph->SSATransformationEnd();
 }
 
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool ConstantPropagation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +82,10 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
+bool MethodUseCount::Gate(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +95,13 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool MethodUseCount::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -76,7 +110,13 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool BBCombine::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -86,7 +126,10 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit* cUnit) const {
+void BBOptimizations::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 6d500a5..3a529f2 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
 
 #include "compiler_internals.h"
-#include "pass.h"
+#include "pass_me.h"
 
 namespace art {
 
@@ -26,16 +26,22 @@
  * @class CacheFieldLoweringInfo
  * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
  */
-class CacheFieldLoweringInfo : public Pass {
+class CacheFieldLoweringInfo : public PassME {
  public:
-  CacheFieldLoweringInfo() : Pass("CacheFieldLoweringInfo", kNoNodes) {
+  CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheFieldLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasFieldAccess();
   }
 };
@@ -44,16 +50,22 @@
  * @class CacheMethodLoweringInfo
  * @brief Cache the lowering info for methods called by INVOKEs.
  */
-class CacheMethodLoweringInfo : public Pass {
+class CacheMethodLoweringInfo : public PassME {
  public:
-  CacheMethodLoweringInfo() : Pass("CacheMethodLoweringInfo", kNoNodes) {
+  CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheMethodLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasInvokes();
   }
 };
@@ -62,26 +74,41 @@
  * @class CallInlining
  * @brief Perform method inlining pass.
  */
-class CallInlining : public Pass {
+class CallInlining : public PassME {
  public:
-  CallInlining() : Pass("CallInlining") {
+  CallInlining() : PassME("CallInlining") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->InlineCallsGate();
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     cUnit->mir_graph->InlineCalls(bb);
     // No need of repeating, so just return false.
     return false;
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsEnd();
   }
 };
@@ -90,48 +117,52 @@
  * @class CodeLayout
  * @brief Perform the code layout pass.
  */
-class CodeLayout : public Pass {
+class CodeLayout : public PassME {
  public:
-  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class SSATransformation
  * @brief Perform an SSA representation pass on the CompilationUnit.
  */
-class SSATransformation : public Pass {
+class SSATransformation : public PassME {
  public:
-  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
-    cUnit->mir_graph->InitializeSSATransformation();
-  }
+  void Start(const PassDataHolder* data) const;
 
-  void End(CompilationUnit* cUnit) const;
+  void End(const PassDataHolder* data) const;
 };
 
 /**
  * @class ConstantPropagation
  * @brief Perform a constant propagation pass.
  */
-class ConstantPropagation : public Pass {
+class ConstantPropagation : public PassME {
  public:
-  ConstantPropagation() : Pass("ConstantPropagation") {
+  ConstantPropagation() : PassME("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -140,12 +171,15 @@
  * @class InitRegLocations
  * @brief Initialize Register Locations.
  */
-class InitRegLocations : public Pass {
+class InitRegLocations : public PassME {
  public:
-  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
+  InitRegLocations() : PassME("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -154,53 +188,77 @@
  * @class MethodUseCount
  * @brief Count the register uses of the method
  */
-class MethodUseCount : public Pass {
+class MethodUseCount : public PassME {
  public:
-  MethodUseCount() : Pass("UseCount") {
+  MethodUseCount() : PassME("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  bool Gate(const CompilationUnit* cUnit) const;
+  bool Gate(const PassDataHolder* data) const;
 };
 
 /**
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class NullCheckEliminationAndTypeInference : public Pass {
+class NullCheckEliminationAndTypeInference : public PassME {
  public:
   NullCheckEliminationAndTypeInference()
-    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+    : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd();
   }
 };
 
-class ClassInitCheckElimination : public Pass {
+class ClassInitCheckElimination : public PassME {
  public:
-  ClassInitCheckElimination() : Pass("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
+  ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecksGate();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecks(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateClassInitChecksEnd();
   }
 };
@@ -209,32 +267,38 @@
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class BBCombine : public Pass {
+class BBCombine : public PassME {
  public:
-  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class BasicBlock Optimizations
  * @brief Any simple BasicBlock optimization can be put here.
  */
-class BBOptimizations : public Pass {
+class BBOptimizations : public PassME {
  public:
-  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
+  BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit* cUnit) const;
+  void Start(const PassDataHolder* data) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/bit_vector_block_iterator.h b/compiler/dex/bit_vector_block_iterator.h
deleted file mode 100644
index 0f1c2b6..0000000
--- a/compiler/dex/bit_vector_block_iterator.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
-#define ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
-
-#include "base/bit_vector.h"
-#include "compiler_enums.h"
-#include "utils/arena_bit_vector.h"
-#include "utils/arena_allocator.h"
-#include "compiler_ir.h"
-
-namespace art {
-
-class MIRGraph;
-
-/**
- * @class BasicBlockIterator
- * @brief Helper class to get the BasicBlocks when iterating through the ArenaBitVector.
- */
-class BitVectorBlockIterator {
-  public:
-    explicit BitVectorBlockIterator(BitVector* bv, MIRGraph* mir_graph)
-      : mir_graph_(mir_graph),
-        internal_iterator_(bv) {}
-
-    explicit BitVectorBlockIterator(BitVector* bv, CompilationUnit* c_unit)
-      : mir_graph_(c_unit->mir_graph.get()),
-        internal_iterator_(bv) {}
-
-    BasicBlock* Next();
-
-    void* operator new(size_t size, ArenaAllocator* arena) {
-      return arena->Alloc(size, kArenaAllocGrowableArray);
-    };
-    void operator delete(void* p) {}  // Nop.
-
-  private:
-    MIRGraph* const mir_graph_;
-    BitVector::Iterator internal_iterator_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index ba4b5c3..5b4492f 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -27,6 +27,15 @@
   kAnyReg,
 };
 
+enum BitsUsed {
+  kSize32Bits,
+  kSize64Bits,
+  kSize128Bits,
+  kSize256Bits,
+  kSize512Bits,
+  kSize1024Bits,
+};
+
 enum SpecialTargetRegister {
   kSelf,            // Thread pointer.
   kSuspend,         // Used to reduce suspend checks for some targets.
@@ -56,17 +65,6 @@
   kLocInvalid
 };
 
-/**
- * Support for vector registers.  Initially used for x86 floats.  This will be used
- * to replace the assumption that a double takes up 2 single FP registers
- */
-enum VectorLengthType {
-  kVectorNotUsed = 0,   // This value is NOT in a vector register.
-  kVectorLength4,       // The value occupies 4 bytes in a vector register.
-  kVectorLength8,       // The value occupies 8 bytes in a vector register.
-  kVectorLength16       // The value occupies 16 bytes in a vector register (unused now).
-};
-
 enum BBType {
   kNullBlock,
   kEntryBlock,
@@ -128,10 +126,108 @@
   kMirOpCheck,
   kMirOpCheckPart2,
   kMirOpSelect,
+
+  // Vector opcodes:
+  // TypeSize is an encoded field giving the element type and the vector size.
+  // It is encoded as OpSize << 16 | (number of bits in vector)
+  //
+  // Destination and source are integers that will be interpreted by the
+  // backend that supports Vector operations.  Backends are permitted to support only
+  // certain vector register sizes.
+  //
+  // At this point, only two operand instructions are supported.  Three operand instructions
+  // could be supported by using a bit in TypeSize and arg[0] where needed.
+
+  // @brief MIR to move constant data to a vector register
+  // vA: number of bits in register
+  // vB: destination
+  // args[0]~args[3]: up to 128 bits of data for initialization
+  kMirOpConstVector,
+
+  // @brief MIR to move a vectorized register to another
+  // vA: TypeSize
+  // vB: destination
+  // vC: source
+  kMirOpMoveVector,
+
+  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedMultiply,
+
+  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedAddition,
+
+  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedSubtract,
+
+  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedShiftLeft,
+
+  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedSignedShiftRight,
+
+  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedUnsignedShiftRight,
+
+  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedAnd,
+
+  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedOr,
+
+  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedXor,
+
+  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
+  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
+  // vA: TypeSize
+  // vB: destination and source VR (not vector register)
+  // vC: source (vector register)
+  kMirOpPackedAddReduce,
+
+  // @brief Extract a packed element into a single VR.
+  // vA: TypeSize
+  // vB: destination VR (not vector register)
+  // vC: source (vector register)
+  // arg[0]: The index to use for extraction from vector register (which packed element)
+  kMirOpPackedReduce,
+
+  // @brief Create a vector value, with all TypeSize values equal to vC
+  // vA: TypeSize
+  // vB: destination vector register
+  // vC: source VR (not vector register)
+  kMirOpPackedSet,
+
   kMirOpLast,
 };
 
-enum MIROptimizationFlagPositons {
+enum MIROptimizationFlagPositions {
   kMIRIgnoreNullCheck = 0,
   kMIRNullCheckOnly,
   kMIRIgnoreRangeCheck,
@@ -143,6 +239,7 @@
   kMIRIgnoreSuspendCheck,
   kMIRDup,
   kMIRMark,                           // Temporary node mark.
+  kMIRLastMIRFlag,
 };
 
 // For successor_block_list.
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 70159ca..35d777e 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -85,8 +85,8 @@
   ArenaAllocator arena;
   ArenaStack arena_stack;  // Arenas for ScopedArenaAllocator.
 
-  UniquePtr<MIRGraph> mir_graph;   // MIR container.
-  UniquePtr<Backend> cg;           // Target-specific codegen.
+  std::unique_ptr<MIRGraph> mir_graph;   // MIR container.
+  std::unique_ptr<Backend> cg;           // Target-specific codegen.
   TimingLogger timings;
 };
 
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index b45d6a4..62973af 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -326,6 +326,81 @@
       GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
+  /**
+   * @class TopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   */
+  class TopologicalSortIterator : public DataflowIterator {
+    public:
+      /**
+       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+       * @param mir_graph The MIRGraph considered.
+       */
+      explicit TopologicalSortIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+            mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+        // Extra setup for TopologicalSortIterator.
+        idx_ = start_idx_;
+        block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+        if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+          /* Compute the topological order */
+          mir_graph->ComputeTopologicalSortOrder();
+        }
+      }
+
+      /**
+       * @brief Get the next BasicBlock depending on iteration order.
+       * @param had_change did the user of the iteration change the previous BasicBlock.
+       * @return the next BasicBlock following the iteration order, 0 if finished.
+       */
+      virtual BasicBlock* Next(bool had_change = false) {
+        // Update changed: if had_changed is true, we remember it for the whole iteration.
+        changed_ |= had_change;
+
+        return ForwardSingleNext();
+      }
+  };
+
+  /**
+   * @class RepeatingTopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   * @details If there is a change during an iteration, the iteration starts over at the end of the
+   *          iteration.
+   */
+  class RepeatingTopologicalSortIterator : public DataflowIterator {
+    public:
+     /**
+      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+      * @param mir_graph The MIRGraph considered.
+      */
+     explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+           mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+       // Extra setup for RepeatingTopologicalSortIterator.
+       idx_ = start_idx_;
+       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+       if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+         /* Compute the topological order */
+         mir_graph->ComputeTopologicalSortOrder();
+       }
+     }
+
+     /**
+      * @brief Get the next BasicBlock depending on iteration order.
+      * @param had_change did the user of the iteration change the previous BasicBlock.
+      * @return the next BasicBlock following the iteration order, 0 if finished.
+      */
+     virtual BasicBlock* Next(bool had_change = false) {
+       // Update changed: if had_changed is true, we remember it for the whole iteration.
+       changed_ |= had_change;
+
+       return ForwardRepeatNext();
+     }
+  };
+
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index ed2ecac..9bad736 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -21,7 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "leb128.h"
 #include "mirror/object.h"
-#include "pass_driver.h"
+#include "pass_driver_me.h"
 #include "runtime.h"
 #include "base/logging.h"
 #include "base/timing_logger.h"
@@ -30,17 +30,17 @@
 
 namespace art {
 
-extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
-extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 }
 
 /* Default optimizer/debug setting for the compiler. */
 static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimizations
-  (1 << kLoadStoreElimination) |
+  (1 << kLoadStoreElimination) |  // TODO: this pass has been broken for awhile - fix or delete.
   // (1 << kLoadHoisting) |
   // (1 << kSuppressLoads) |
   // (1 << kNullCheckElimination) |
@@ -75,34 +75,36 @@
   // (1 << kDebugShowSummaryMemoryUsage) |
   // (1 << kDebugShowFilterStats) |
   // (1 << kDebugTimings) |
+  // (1 << kDebugCodegenDump) |
   0;
 
 CompilationUnit::CompilationUnit(ArenaPool* pool)
-  : compiler_driver(NULL),
-    class_linker(NULL),
-    dex_file(NULL),
-    class_loader(NULL),
+  : compiler_driver(nullptr),
+    class_linker(nullptr),
+    dex_file(nullptr),
+    class_loader(nullptr),
     class_def_idx(0),
     method_idx(0),
-    code_item(NULL),
+    code_item(nullptr),
     access_flags(0),
     invoke_type(kDirect),
-    shorty(NULL),
+    shorty(nullptr),
     disable_opt(0),
     enable_debug(0),
     verbose(false),
-    compiler(NULL),
+    compiler(nullptr),
     instruction_set(kNone),
+    target64(false),
     num_dalvik_registers(0),
-    insns(NULL),
+    insns(nullptr),
     num_ins(0),
     num_outs(0),
     num_regs(0),
     compiler_flip_match(false),
     arena(pool),
     arena_stack(pool),
-    mir_graph(NULL),
-    cg(NULL),
+    mir_graph(nullptr),
+    cg(nullptr),
     timings("QuickCompiler", true, false) {
 }
 
@@ -131,6 +133,664 @@
   }
 }
 
+// TODO: Remove this when we are able to compile everything.
+int arm64_support_list[] = {
+    Instruction::NOP,
+    Instruction::MOVE,
+    Instruction::MOVE_FROM16,
+    Instruction::MOVE_16,
+    Instruction::MOVE_WIDE,
+    Instruction::MOVE_WIDE_FROM16,
+    Instruction::MOVE_WIDE_16,
+    Instruction::MOVE_OBJECT,
+    Instruction::MOVE_OBJECT_FROM16,
+    Instruction::MOVE_OBJECT_16,
+    // Instruction::MOVE_RESULT,
+    // Instruction::MOVE_RESULT_WIDE,
+    // Instruction::MOVE_RESULT_OBJECT,
+    Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    Instruction::RETURN_WIDE,
+    // Instruction::RETURN_OBJECT,
+    // Instruction::CONST_4,
+    // Instruction::CONST_16,
+    // Instruction::CONST,
+    // Instruction::CONST_HIGH16,
+    // Instruction::CONST_WIDE_16,
+    // Instruction::CONST_WIDE_32,
+    // Instruction::CONST_WIDE,
+    // Instruction::CONST_WIDE_HIGH16,
+    // Instruction::CONST_STRING,
+    // Instruction::CONST_STRING_JUMBO,
+    // Instruction::CONST_CLASS,
+    Instruction::MONITOR_ENTER,
+    Instruction::MONITOR_EXIT,
+    // Instruction::CHECK_CAST,
+    // Instruction::INSTANCE_OF,
+    // Instruction::ARRAY_LENGTH,
+    // Instruction::NEW_INSTANCE,
+    // Instruction::NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY_RANGE,
+    // Instruction::FILL_ARRAY_DATA,
+    Instruction::THROW,
+    // Instruction::GOTO,
+    // Instruction::GOTO_16,
+    // Instruction::GOTO_32,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
+    // Instruction::CMPL_FLOAT,
+    // Instruction::CMPG_FLOAT,
+    // Instruction::CMPL_DOUBLE,
+    // Instruction::CMPG_DOUBLE,
+    Instruction::CMP_LONG,
+    // Instruction::IF_EQ,
+    // Instruction::IF_NE,
+    // Instruction::IF_LT,
+    // Instruction::IF_GE,
+    // Instruction::IF_GT,
+    // Instruction::IF_LE,
+    // Instruction::IF_EQZ,
+    // Instruction::IF_NEZ,
+    // Instruction::IF_LTZ,
+    // Instruction::IF_GEZ,
+    // Instruction::IF_GTZ,
+    // Instruction::IF_LEZ,
+    // Instruction::UNUSED_3E,
+    // Instruction::UNUSED_3F,
+    // Instruction::UNUSED_40,
+    // Instruction::UNUSED_41,
+    // Instruction::UNUSED_42,
+    // Instruction::UNUSED_43,
+    // Instruction::AGET,
+    // Instruction::AGET_WIDE,
+    // Instruction::AGET_OBJECT,
+    // Instruction::AGET_BOOLEAN,
+    // Instruction::AGET_BYTE,
+    // Instruction::AGET_CHAR,
+    // Instruction::AGET_SHORT,
+    // Instruction::APUT,
+    // Instruction::APUT_WIDE,
+    // Instruction::APUT_OBJECT,
+    // Instruction::APUT_BOOLEAN,
+    // Instruction::APUT_BYTE,
+    // Instruction::APUT_CHAR,
+    // Instruction::APUT_SHORT,
+    // Instruction::IGET,
+    // Instruction::IGET_WIDE,
+    // Instruction::IGET_OBJECT,
+    // Instruction::IGET_BOOLEAN,
+    // Instruction::IGET_BYTE,
+    // Instruction::IGET_CHAR,
+    // Instruction::IGET_SHORT,
+    // Instruction::IPUT,
+    // Instruction::IPUT_WIDE,
+    // Instruction::IPUT_OBJECT,
+    // Instruction::IPUT_BOOLEAN,
+    // Instruction::IPUT_BYTE,
+    // Instruction::IPUT_CHAR,
+    // Instruction::IPUT_SHORT,
+    Instruction::SGET,
+    // Instruction::SGET_WIDE,
+    Instruction::SGET_OBJECT,
+    // Instruction::SGET_BOOLEAN,
+    // Instruction::SGET_BYTE,
+    // Instruction::SGET_CHAR,
+    // Instruction::SGET_SHORT,
+    Instruction::SPUT,
+    // Instruction::SPUT_WIDE,
+    // Instruction::SPUT_OBJECT,
+    // Instruction::SPUT_BOOLEAN,
+    // Instruction::SPUT_BYTE,
+    // Instruction::SPUT_CHAR,
+    // Instruction::SPUT_SHORT,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    // Instruction::RETURN_VOID_BARRIER,
+    // Instruction::INVOKE_VIRTUAL_RANGE,
+    // Instruction::INVOKE_SUPER_RANGE,
+    // Instruction::INVOKE_DIRECT_RANGE,
+    // Instruction::INVOKE_STATIC_RANGE,
+    // Instruction::INVOKE_INTERFACE_RANGE,
+    // Instruction::UNUSED_79,
+    // Instruction::UNUSED_7A,
+    Instruction::NEG_INT,
+    Instruction::NOT_INT,
+    Instruction::NEG_LONG,
+    Instruction::NOT_LONG,
+    // Instruction::NEG_FLOAT,
+    // Instruction::NEG_DOUBLE,
+    Instruction::INT_TO_LONG,
+    // Instruction::INT_TO_FLOAT,
+    // Instruction::INT_TO_DOUBLE,
+    Instruction::LONG_TO_INT,
+    // Instruction::LONG_TO_FLOAT,
+    // Instruction::LONG_TO_DOUBLE,
+    // Instruction::FLOAT_TO_INT,
+    // Instruction::FLOAT_TO_LONG,
+    // Instruction::FLOAT_TO_DOUBLE,
+    // Instruction::DOUBLE_TO_INT,
+    // Instruction::DOUBLE_TO_LONG,
+    // Instruction::DOUBLE_TO_FLOAT,
+    Instruction::INT_TO_BYTE,
+    Instruction::INT_TO_CHAR,
+    Instruction::INT_TO_SHORT,
+    Instruction::ADD_INT,
+    Instruction::SUB_INT,
+    Instruction::MUL_INT,
+    Instruction::DIV_INT,
+    Instruction::REM_INT,
+    Instruction::AND_INT,
+    Instruction::OR_INT,
+    Instruction::XOR_INT,
+    Instruction::SHL_INT,
+    Instruction::SHR_INT,
+    Instruction::USHR_INT,
+    Instruction::ADD_LONG,
+    Instruction::SUB_LONG,
+    Instruction::MUL_LONG,
+    Instruction::DIV_LONG,
+    Instruction::REM_LONG,
+    Instruction::AND_LONG,
+    Instruction::OR_LONG,
+    Instruction::XOR_LONG,
+    Instruction::SHL_LONG,
+    Instruction::SHR_LONG,
+    Instruction::USHR_LONG,
+    // Instruction::ADD_FLOAT,
+    // Instruction::SUB_FLOAT,
+    // Instruction::MUL_FLOAT,
+    // Instruction::DIV_FLOAT,
+    // Instruction::REM_FLOAT,
+    // Instruction::ADD_DOUBLE,
+    // Instruction::SUB_DOUBLE,
+    // Instruction::MUL_DOUBLE,
+    // Instruction::DIV_DOUBLE,
+    // Instruction::REM_DOUBLE,
+    Instruction::ADD_INT_2ADDR,
+    Instruction::SUB_INT_2ADDR,
+    Instruction::MUL_INT_2ADDR,
+    Instruction::DIV_INT_2ADDR,
+    Instruction::REM_INT_2ADDR,
+    Instruction::AND_INT_2ADDR,
+    Instruction::OR_INT_2ADDR,
+    Instruction::XOR_INT_2ADDR,
+    Instruction::SHL_INT_2ADDR,
+    Instruction::SHR_INT_2ADDR,
+    Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_LONG_2ADDR,
+    Instruction::SUB_LONG_2ADDR,
+    Instruction::MUL_LONG_2ADDR,
+    Instruction::DIV_LONG_2ADDR,
+    Instruction::REM_LONG_2ADDR,
+    Instruction::AND_LONG_2ADDR,
+    Instruction::OR_LONG_2ADDR,
+    Instruction::XOR_LONG_2ADDR,
+    Instruction::SHL_LONG_2ADDR,
+    Instruction::SHR_LONG_2ADDR,
+    Instruction::USHR_LONG_2ADDR,
+    // Instruction::ADD_FLOAT_2ADDR,
+    // Instruction::SUB_FLOAT_2ADDR,
+    // Instruction::MUL_FLOAT_2ADDR,
+    // Instruction::DIV_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
+    // Instruction::ADD_DOUBLE_2ADDR,
+    // Instruction::SUB_DOUBLE_2ADDR,
+    // Instruction::MUL_DOUBLE_2ADDR,
+    // Instruction::DIV_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
+    Instruction::ADD_INT_LIT16,
+    Instruction::RSUB_INT,
+    Instruction::MUL_INT_LIT16,
+    Instruction::DIV_INT_LIT16,
+    Instruction::REM_INT_LIT16,
+    Instruction::AND_INT_LIT16,
+    Instruction::OR_INT_LIT16,
+    Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT8,
+    Instruction::RSUB_INT_LIT8,
+    Instruction::MUL_INT_LIT8,
+    Instruction::DIV_INT_LIT8,
+    Instruction::REM_INT_LIT8,
+    Instruction::AND_INT_LIT8,
+    Instruction::OR_INT_LIT8,
+    Instruction::XOR_INT_LIT8,
+    Instruction::SHL_INT_LIT8,
+    Instruction::SHR_INT_LIT8,
+    Instruction::USHR_INT_LIT8,
+    // Instruction::IGET_QUICK,
+    // Instruction::IGET_WIDE_QUICK,
+    // Instruction::IGET_OBJECT_QUICK,
+    // Instruction::IPUT_QUICK,
+    // Instruction::IPUT_WIDE_QUICK,
+    // Instruction::IPUT_OBJECT_QUICK,
+    // Instruction::INVOKE_VIRTUAL_QUICK,
+    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+    // Instruction::UNUSED_EB,
+    // Instruction::UNUSED_EC,
+    // Instruction::UNUSED_ED,
+    // Instruction::UNUSED_EE,
+    // Instruction::UNUSED_EF,
+    // Instruction::UNUSED_F0,
+    // Instruction::UNUSED_F1,
+    // Instruction::UNUSED_F2,
+    // Instruction::UNUSED_F3,
+    // Instruction::UNUSED_F4,
+    // Instruction::UNUSED_F5,
+    // Instruction::UNUSED_F6,
+    // Instruction::UNUSED_F7,
+    // Instruction::UNUSED_F8,
+    // Instruction::UNUSED_F9,
+    // Instruction::UNUSED_FA,
+    // Instruction::UNUSED_FB,
+    // Instruction::UNUSED_FC,
+    // Instruction::UNUSED_FD,
+    // Instruction::UNUSED_FE,
+    // Instruction::UNUSED_FF,
+
+    // ----- ExtendedMIROpcode -----
+    // kMirOpPhi,
+    // kMirOpCopy,
+    // kMirOpFusedCmplFloat,
+    // kMirOpFusedCmpgFloat,
+    // kMirOpFusedCmplDouble,
+    // kMirOpFusedCmpgDouble,
+    // kMirOpFusedCmpLong,
+    // kMirOpNop,
+    // kMirOpNullCheck,
+    // kMirOpRangeCheck,
+    kMirOpDivZeroCheck,
+    kMirOpCheck,
+    // kMirOpCheckPart2,
+    // kMirOpSelect,
+    // kMirOpLast,
+};
+
+// TODO: Remove this when we are able to compile everything.
+int x86_64_support_list[] = {
+    Instruction::NOP,
+    // Instruction::MOVE,
+    // Instruction::MOVE_FROM16,
+    // Instruction::MOVE_16,
+    // Instruction::MOVE_WIDE,
+    // Instruction::MOVE_WIDE_FROM16,
+    // Instruction::MOVE_WIDE_16,
+    // Instruction::MOVE_OBJECT,
+    // Instruction::MOVE_OBJECT_FROM16,
+    // Instruction::MOVE_OBJECT_16,
+    // Instruction::MOVE_RESULT,
+    // Instruction::MOVE_RESULT_WIDE,
+    // Instruction::MOVE_RESULT_OBJECT,
+    // Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    // Instruction::RETURN_WIDE,
+    Instruction::RETURN_OBJECT,
+    // Instruction::CONST_4,
+    // Instruction::CONST_16,
+    // Instruction::CONST,
+    // Instruction::CONST_HIGH16,
+    // Instruction::CONST_WIDE_16,
+    // Instruction::CONST_WIDE_32,
+    // Instruction::CONST_WIDE,
+    // Instruction::CONST_WIDE_HIGH16,
+    // Instruction::CONST_STRING,
+    // Instruction::CONST_STRING_JUMBO,
+    // Instruction::CONST_CLASS,
+    // Instruction::MONITOR_ENTER,
+    // Instruction::MONITOR_EXIT,
+    // Instruction::CHECK_CAST,
+    // Instruction::INSTANCE_OF,
+    // Instruction::ARRAY_LENGTH,
+    // Instruction::NEW_INSTANCE,
+    // Instruction::NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY_RANGE,
+    // Instruction::FILL_ARRAY_DATA,
+    // Instruction::THROW,
+    // Instruction::GOTO,
+    // Instruction::GOTO_16,
+    // Instruction::GOTO_32,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
+    // Instruction::CMPL_FLOAT,
+    // Instruction::CMPG_FLOAT,
+    // Instruction::CMPL_DOUBLE,
+    // Instruction::CMPG_DOUBLE,
+    // Instruction::CMP_LONG,
+    // Instruction::IF_EQ,
+    // Instruction::IF_NE,
+    // Instruction::IF_LT,
+    // Instruction::IF_GE,
+    // Instruction::IF_GT,
+    // Instruction::IF_LE,
+    // Instruction::IF_EQZ,
+    // Instruction::IF_NEZ,
+    // Instruction::IF_LTZ,
+    // Instruction::IF_GEZ,
+    // Instruction::IF_GTZ,
+    // Instruction::IF_LEZ,
+    // Instruction::UNUSED_3E,
+    // Instruction::UNUSED_3F,
+    // Instruction::UNUSED_40,
+    // Instruction::UNUSED_41,
+    // Instruction::UNUSED_42,
+    // Instruction::UNUSED_43,
+    // Instruction::AGET,
+    // Instruction::AGET_WIDE,
+    // Instruction::AGET_OBJECT,
+    // Instruction::AGET_BOOLEAN,
+    // Instruction::AGET_BYTE,
+    // Instruction::AGET_CHAR,
+    // Instruction::AGET_SHORT,
+    // Instruction::APUT,
+    // Instruction::APUT_WIDE,
+    // Instruction::APUT_OBJECT,
+    // Instruction::APUT_BOOLEAN,
+    // Instruction::APUT_BYTE,
+    // Instruction::APUT_CHAR,
+    // Instruction::APUT_SHORT,
+    // Instruction::IGET,
+    // Instruction::IGET_WIDE,
+    // Instruction::IGET_OBJECT,
+    // Instruction::IGET_BOOLEAN,
+    // Instruction::IGET_BYTE,
+    // Instruction::IGET_CHAR,
+    // Instruction::IGET_SHORT,
+    // Instruction::IPUT,
+    // Instruction::IPUT_WIDE,
+    // Instruction::IPUT_OBJECT,
+    // Instruction::IPUT_BOOLEAN,
+    // Instruction::IPUT_BYTE,
+    // Instruction::IPUT_CHAR,
+    // Instruction::IPUT_SHORT,
+    Instruction::SGET,
+    // Instruction::SGET_WIDE,
+    Instruction::SGET_OBJECT,
+    Instruction::SGET_BOOLEAN,
+    Instruction::SGET_BYTE,
+    Instruction::SGET_CHAR,
+    Instruction::SGET_SHORT,
+    Instruction::SPUT,
+    // Instruction::SPUT_WIDE,
+    Instruction::SPUT_OBJECT,
+    Instruction::SPUT_BOOLEAN,
+    Instruction::SPUT_BYTE,
+    Instruction::SPUT_CHAR,
+    Instruction::SPUT_SHORT,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    // Instruction::RETURN_VOID_BARRIER,
+    // Instruction::INVOKE_VIRTUAL_RANGE,
+    // Instruction::INVOKE_SUPER_RANGE,
+    // Instruction::INVOKE_DIRECT_RANGE,
+    // Instruction::INVOKE_STATIC_RANGE,
+    // Instruction::INVOKE_INTERFACE_RANGE,
+    // Instruction::UNUSED_79,
+    // Instruction::UNUSED_7A,
+    // Instruction::NEG_INT,
+    // Instruction::NOT_INT,
+    // Instruction::NEG_LONG,
+    // Instruction::NOT_LONG,
+    // Instruction::NEG_FLOAT,
+    // Instruction::NEG_DOUBLE,
+    // Instruction::INT_TO_LONG,
+    // Instruction::INT_TO_FLOAT,
+    // Instruction::INT_TO_DOUBLE,
+    // Instruction::LONG_TO_INT,
+    // Instruction::LONG_TO_FLOAT,
+    // Instruction::LONG_TO_DOUBLE,
+    // Instruction::FLOAT_TO_INT,
+    // Instruction::FLOAT_TO_LONG,
+    // Instruction::FLOAT_TO_DOUBLE,
+    // Instruction::DOUBLE_TO_INT,
+    // Instruction::DOUBLE_TO_LONG,
+    // Instruction::DOUBLE_TO_FLOAT,
+    // Instruction::INT_TO_BYTE,
+    // Instruction::INT_TO_CHAR,
+    // Instruction::INT_TO_SHORT,
+    // Instruction::ADD_INT,
+    // Instruction::SUB_INT,
+    // Instruction::MUL_INT,
+    // Instruction::DIV_INT,
+    // Instruction::REM_INT,
+    // Instruction::AND_INT,
+    // Instruction::OR_INT,
+    // Instruction::XOR_INT,
+    // Instruction::SHL_INT,
+    // Instruction::SHR_INT,
+    // Instruction::USHR_INT,
+    // Instruction::ADD_LONG,
+    // Instruction::SUB_LONG,
+    // Instruction::MUL_LONG,
+    // Instruction::DIV_LONG,
+    // Instruction::REM_LONG,
+    // Instruction::AND_LONG,
+    // Instruction::OR_LONG,
+    // Instruction::XOR_LONG,
+    // Instruction::SHL_LONG,
+    // Instruction::SHR_LONG,
+    // Instruction::USHR_LONG,
+    // Instruction::ADD_FLOAT,
+    // Instruction::SUB_FLOAT,
+    // Instruction::MUL_FLOAT,
+    // Instruction::DIV_FLOAT,
+    // Instruction::REM_FLOAT,
+    // Instruction::ADD_DOUBLE,
+    // Instruction::SUB_DOUBLE,
+    // Instruction::MUL_DOUBLE,
+    // Instruction::DIV_DOUBLE,
+    // Instruction::REM_DOUBLE,
+    // Instruction::ADD_INT_2ADDR,
+    // Instruction::SUB_INT_2ADDR,
+    // Instruction::MUL_INT_2ADDR,
+    // Instruction::DIV_INT_2ADDR,
+    // Instruction::REM_INT_2ADDR,
+    // Instruction::AND_INT_2ADDR,
+    // Instruction::OR_INT_2ADDR,
+    // Instruction::XOR_INT_2ADDR,
+    // Instruction::SHL_INT_2ADDR,
+    // Instruction::SHR_INT_2ADDR,
+    // Instruction::USHR_INT_2ADDR,
+    // Instruction::ADD_LONG_2ADDR,
+    // Instruction::SUB_LONG_2ADDR,
+    // Instruction::MUL_LONG_2ADDR,
+    // Instruction::DIV_LONG_2ADDR,
+    // Instruction::REM_LONG_2ADDR,
+    // Instruction::AND_LONG_2ADDR,
+    // Instruction::OR_LONG_2ADDR,
+    // Instruction::XOR_LONG_2ADDR,
+    // Instruction::SHL_LONG_2ADDR,
+    // Instruction::SHR_LONG_2ADDR,
+    // Instruction::USHR_LONG_2ADDR,
+    // Instruction::ADD_FLOAT_2ADDR,
+    // Instruction::SUB_FLOAT_2ADDR,
+    // Instruction::MUL_FLOAT_2ADDR,
+    // Instruction::DIV_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
+    // Instruction::ADD_DOUBLE_2ADDR,
+    // Instruction::SUB_DOUBLE_2ADDR,
+    // Instruction::MUL_DOUBLE_2ADDR,
+    // Instruction::DIV_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
+    // Instruction::ADD_INT_LIT16,
+    // Instruction::RSUB_INT,
+    // Instruction::MUL_INT_LIT16,
+    // Instruction::DIV_INT_LIT16,
+    // Instruction::REM_INT_LIT16,
+    // Instruction::AND_INT_LIT16,
+    // Instruction::OR_INT_LIT16,
+    // Instruction::XOR_INT_LIT16,
+    // Instruction::ADD_INT_LIT8,
+    // Instruction::RSUB_INT_LIT8,
+    // Instruction::MUL_INT_LIT8,
+    // Instruction::DIV_INT_LIT8,
+    // Instruction::REM_INT_LIT8,
+    // Instruction::AND_INT_LIT8,
+    // Instruction::OR_INT_LIT8,
+    // Instruction::XOR_INT_LIT8,
+    // Instruction::SHL_INT_LIT8,
+    // Instruction::SHR_INT_LIT8,
+    // Instruction::USHR_INT_LIT8,
+    // Instruction::IGET_QUICK,
+    // Instruction::IGET_WIDE_QUICK,
+    // Instruction::IGET_OBJECT_QUICK,
+    // Instruction::IPUT_QUICK,
+    // Instruction::IPUT_WIDE_QUICK,
+    // Instruction::IPUT_OBJECT_QUICK,
+    // Instruction::INVOKE_VIRTUAL_QUICK,
+    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+    // Instruction::UNUSED_EB,
+    // Instruction::UNUSED_EC,
+    // Instruction::UNUSED_ED,
+    // Instruction::UNUSED_EE,
+    // Instruction::UNUSED_EF,
+    // Instruction::UNUSED_F0,
+    // Instruction::UNUSED_F1,
+    // Instruction::UNUSED_F2,
+    // Instruction::UNUSED_F3,
+    // Instruction::UNUSED_F4,
+    // Instruction::UNUSED_F5,
+    // Instruction::UNUSED_F6,
+    // Instruction::UNUSED_F7,
+    // Instruction::UNUSED_F8,
+    // Instruction::UNUSED_F9,
+    // Instruction::UNUSED_FA,
+    // Instruction::UNUSED_FB,
+    // Instruction::UNUSED_FC,
+    // Instruction::UNUSED_FD,
+    // Instruction::UNUSED_FE,
+    // Instruction::UNUSED_FF,
+
+    // ----- ExtendedMIROpcode -----
+    // kMirOpPhi,
+    // kMirOpCopy,
+    // kMirOpFusedCmplFloat,
+    // kMirOpFusedCmpgFloat,
+    // kMirOpFusedCmplDouble,
+    // kMirOpFusedCmpgDouble,
+    // kMirOpFusedCmpLong,
+    // kMirOpNop,
+    // kMirOpNullCheck,
+    // kMirOpRangeCheck,
+    // kMirOpDivZeroCheck,
+    // kMirOpCheck,
+    // kMirOpCheckPart2,
+    // kMirOpSelect,
+    // kMirOpLast,
+};
+
+// Z : boolean
+// B : byte
+// S : short
+// C : char
+// I : int
+// L : long
+// F : float
+// D : double
+// L : reference(object, array)
+// V : void
+// (ARM64) Current calling conversion only support 32bit softfp
+//         which has problems with long, float, double
+constexpr char arm64_supported_types[] = "ZBSCILVJ";
+// (x84_64) We still have troubles with compiling longs/doubles/floats
+constexpr char x86_64_supported_types[] = "ZBSCILV";
+
+// TODO: Remove this when we are able to compile everything.
+static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
+  uint32_t shorty_size = strlen(shorty);
+  CHECK_GE(shorty_size, 1u);
+  // Set a limitation on maximum number of parameters.
+  // Note : there is an implied "method*" parameter, and probably "this" as well.
+  // 1 is for the return type. Currently, we only accept 2 parameters at the most.
+  // (x86_64): For now we have the same limitation. But we might want to split this
+  //           check in future into two separate cases for arm64 and x86_64.
+  if (shorty_size > (1 + 2)) {
+    return false;
+  }
+
+  const char* supported_types = arm64_supported_types;
+  if (instruction_set == kX86_64) {
+    supported_types = x86_64_supported_types;
+  }
+  for (uint32_t i = 0; i < shorty_size; i++) {
+    if (strchr(supported_types, shorty[i]) == nullptr) {
+      return false;
+    }
+  }
+  return true;
+};
+
+// TODO: Remove this when we are able to compile everything.
+// Skip the method that we do not support currently.
+static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+                             CompilationUnit& cu) {
+  // There is some limitation with current ARM 64 backend.
+  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
+    // Check if we can compile the prototype.
+    const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
+    if (!CanCompileShorty(shorty, cu.instruction_set)) {
+      VLOG(compiler) << "Unsupported shorty : " << shorty;
+      return false;
+    }
+
+    const int *support_list = arm64_support_list;
+    int support_list_size = arraysize(arm64_support_list);
+    if (cu.instruction_set == kX86_64) {
+      support_list = x86_64_support_list;
+      support_list_size = arraysize(x86_64_support_list);
+    }
+
+    for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+      BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx);
+      if (bb == NULL) continue;
+      if (bb->block_type == kDead) continue;
+      for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+        int opcode = mir->dalvikInsn.opcode;
+        // Check if we support the byte code.
+        if (std::find(support_list, support_list + support_list_size,
+            opcode) == support_list + support_list_size) {
+          if (opcode < kMirOpFirst) {
+            VLOG(compiler) << "Unsupported dalvik byte code : "
+                           << mir->dalvikInsn.opcode;
+          } else {
+            VLOG(compiler) << "Unsupported extended MIR opcode : "
+                           << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
+          }
+          return false;
+        }
+        // Check if it invokes a prototype that we cannot support.
+        if (Instruction::INVOKE_VIRTUAL == opcode ||
+            Instruction::INVOKE_SUPER == opcode ||
+            Instruction::INVOKE_DIRECT == opcode ||
+            Instruction::INVOKE_STATIC == opcode ||
+            Instruction::INVOKE_INTERFACE == opcode) {
+          uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+          const char* invoke_method_shorty = dex_file.GetMethodShorty(
+              dex_file.GetMethodId(invoke_method_idx));
+          if (!CanCompileShorty(invoke_method_shorty, cu.instruction_set)) {
+            VLOG(compiler) << "Unsupported to invoke '"
+                           << PrettyMethod(invoke_method_idx, dex_file)
+                           << "' with shorty : " << invoke_method_shorty;
+            return false;
+          }
+        }
+      }
+    }
+
+    LOG(INFO) << "Using experimental instruction set A64 for "
+              << PrettyMethod(method_idx, dex_file);
+  }
+  return true;
+}
+
 static CompiledMethod* CompileMethod(CompilerDriver& driver,
                                      Compiler* compiler,
                                      const DexFile::CodeItem* code_item,
@@ -162,6 +822,7 @@
   cu.compiler = compiler;
   // TODO: x86_64 & arm64 are not yet implemented.
   CHECK((cu.instruction_set == kThumb2) ||
+        (cu.instruction_set == kArm64) ||
         (cu.instruction_set == kX86) ||
         (cu.instruction_set == kX86_64) ||
         (cu.instruction_set == kMips));
@@ -192,6 +853,10 @@
     }
   }
 
+  if (cu.verbose) {
+    cu.enable_debug |= (1 << kDebugCodegenDump);
+  }
+
   /*
    * TODO: rework handling of optimization and debug flags.  Should we split out
    * MIR and backend flags?  Need command-line setting as well.
@@ -214,6 +879,13 @@
         (1 << kPromoteCompilerTemps));
   }
 
+  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
+    // TODO(Arm64): enable optimizations once backend is mature enough.
+    // TODO(X86_64): enable optimizations once backend is mature enough.
+    cu.disable_opt = ~(uint32_t)0;
+    cu.enable_debug |= (1 << kDebugCodegenDump);
+  }
+
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
 
@@ -241,13 +913,19 @@
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
+  // TODO(Arm64): Remove this when we are able to compile everything.
+  if (!CanCompileMethod(method_idx, dex_file, cu)) {
+    VLOG(compiler) << "Cannot compile method : " << PrettyMethod(method_idx, dex_file);
+    return nullptr;
+  }
+
   cu.NewTimingSplit("MIROpt:CheckFilters");
   if (cu.mir_graph->SkipCompilation()) {
     return NULL;
   }
 
   /* Create the pass driver and launch it */
-  PassDriver pass_driver(&cu);
+  PassDriverME pass_driver(&cu);
   pass_driver.Launch();
 
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index f714ecd..9e376ee 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -76,7 +76,8 @@
   kDebugVerifyBitcode,
   kDebugShowSummaryMemoryUsage,
   kDebugShowFilterStats,
-  kDebugTimings
+  kDebugTimings,
+  kDebugCodegenDump
 };
 
 class LLVMInfo {
@@ -101,10 +102,10 @@
     }
 
   private:
-    UniquePtr< ::llvm::LLVMContext> llvm_context_;
+    std::unique_ptr< ::llvm::LLVMContext> llvm_context_;
     ::llvm::Module* llvm_module_;  // Managed by context_.
-    UniquePtr<art::llvm::IntrinsicHelper> intrinsic_helper_;
-    UniquePtr<art::llvm::IRBuilder> ir_builder_;
+    std::unique_ptr<art::llvm::IntrinsicHelper> intrinsic_helper_;
+    std::unique_ptr<art::llvm::IRBuilder> ir_builder_;
 };
 
 class CompiledMethod;
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 535b613..0c2b6a7 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -17,9 +17,11 @@
 #ifndef ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
 #define ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
 
+#include <memory>
+
 #include "compiler_internals.h"
-#include "UniquePtr.h"
 #include "utils/scoped_arena_allocator.h"
+#include "utils/scoped_arena_containers.h"
 
 #define NO_VALUE 0xffff
 #define ARRAY_REF 0xfffe
@@ -75,24 +77,20 @@
   };
 
   // Key is s_reg, value is value name.
-  typedef SafeMap<uint16_t, uint16_t, std::less<uint16_t>,
-      ScopedArenaAllocatorAdapter<std::pair<uint16_t, uint16_t> > > SregValueMap;
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
   // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name.
-  typedef SafeMap<uint64_t, uint16_t, std::less<uint64_t>,
-      ScopedArenaAllocatorAdapter<std::pair<uint64_t, uint16_t> > > ValueMap;
+  typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap;
   // Key represents a memory address, value is generation.
-  typedef SafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator,
-      ScopedArenaAllocatorAdapter<std::pair<MemoryVersionKey, uint16_t> > > MemoryVersionMap;
+  typedef ScopedArenaSafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator
+      > MemoryVersionMap;
   // Maps field key to field id for resolved fields.
-  typedef SafeMap<FieldReference, uint32_t, FieldReferenceComparator,
-      ScopedArenaAllocatorAdapter<std::pair<FieldReference, uint16_t> > > FieldIndexMap;
+  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
   // A set of value names.
-  typedef std::set<uint16_t, std::less<uint16_t>,
-      ScopedArenaAllocatorAdapter<uint16_t> > ValueNameSet;
+  typedef ScopedArenaSet<uint16_t> ValueNameSet;
 
  public:
   static LocalValueNumbering* Create(CompilationUnit* cu) {
-    UniquePtr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack));
+    std::unique_ptr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack));
     void* addr = allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMisc);
     return new(addr) LocalValueNumbering(cu, allocator.release());
   }
@@ -198,7 +196,7 @@
   void HandlePutObject(MIR* mir);
 
   CompilationUnit* const cu_;
-  UniquePtr<ScopedArenaAllocator> allocator_;
+  std::unique_ptr<ScopedArenaAllocator> allocator_;
   SregValueMap sreg_value_map_;
   SregValueMap sreg_wide_value_map_;
   ValueMap value_map_;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index ebac871..e56e016 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -144,7 +144,6 @@
       mir->ssa_rep->fp_def = nullptr;  // Not used by LVN.
       mir->dalvikInsn.opcode = def->opcode;
       mir->offset = i;  // LVN uses offset only for debug output
-      mir->width = 1u;  // Not used by LVN.
       mir->optimization_flags = 0u;
 
       if (i != 0u) {
@@ -181,7 +180,7 @@
   MIR* mirs_;
   std::vector<SSARepresentation> ssa_reps_;
   std::vector<uint16_t> value_names_;
-  UniquePtr<LocalValueNumbering> lvn_;
+  std::unique_ptr<LocalValueNumbering> lvn_;
 };
 
 TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 200795e..508f1c7 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -15,6 +15,8 @@
  */
 
 #include <algorithm>
+#include <memory>
+
 #include "compiler_internals.h"
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
@@ -23,7 +25,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "UniquePtr.h"
+#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -1205,17 +1207,16 @@
     MethodReferenceComparator devirt_cmp;
   };
 
-  // Map invoke key (see MapEntry) to lowering info index.
-  typedef std::set<MapEntry, MapEntryComparator, ScopedArenaAllocatorAdapter<MapEntry> > InvokeMap;
-
   ScopedArenaAllocator allocator(&cu_->arena_stack);
 
   // All INVOKE instructions take 3 code units and there must also be a RETURN.
   uint32_t max_refs = (current_code_item_->insns_size_in_code_units_ - 1u) / 3u;
 
+  // Map invoke key (see MapEntry) to lowering info index and vice versa.
   // The invoke_map and sequential entries are essentially equivalent to Boost.MultiIndex's
   // multi_index_container with one ordered index and one sequential index.
-  InvokeMap invoke_map(MapEntryComparator(), allocator.Adapter());
+  ScopedArenaSet<MapEntry, MapEntryComparator> invoke_map(MapEntryComparator(),
+                                                          allocator.Adapter());
   const MapEntry** sequential_entries = reinterpret_cast<const MapEntry**>(
       allocator.Alloc(max_refs * sizeof(sequential_entries[0]), kArenaAllocMisc));
 
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 36f1be7..ed7e1f5 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -879,8 +879,8 @@
       new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapLiveIn);
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (df_attributes & DF_HAS_USES) {
       if (df_attributes & DF_UA) {
@@ -925,11 +925,17 @@
 int MIRGraph::AddNewSReg(int v_reg) {
   // Compiler temps always have a subscript of 0
   int subscript = (v_reg < 0) ? 0 : ++ssa_last_defs_[v_reg];
-  int ssa_reg = GetNumSSARegs();
+  uint32_t ssa_reg = GetNumSSARegs();
   SetNumSSARegs(ssa_reg + 1);
   ssa_base_vregs_->Insert(v_reg);
   ssa_subscripts_->Insert(subscript);
   DCHECK_EQ(ssa_base_vregs_->Size(), ssa_subscripts_->Size());
+  // If we are expanding very late, update use counts too.
+  if (ssa_reg > 0 && use_counts_.Size() == ssa_reg) {
+    // Need to expand the counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
   return ssa_reg;
 }
 
@@ -949,7 +955,7 @@
 
 /* Look up new SSA names for format_35c instructions */
 void MIRGraph::DataFlowSSAFormat35C(MIR* mir) {
-  DecodedInstruction *d_insn = &mir->dalvikInsn;
+  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
   int num_uses = d_insn->vA;
   int i;
 
@@ -967,7 +973,7 @@
 
 /* Look up new SSA names for format_3rc instructions */
 void MIRGraph::DataFlowSSAFormat3RC(MIR* mir) {
-  DecodedInstruction *d_insn = &mir->dalvikInsn;
+  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
   int num_uses = d_insn->vA;
   int i;
 
@@ -994,7 +1000,7 @@
         static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
                                                               kArenaAllocDFInfo));
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
       // If not a pseudo-op, note non-leaf or can throw
     if (static_cast<int>(mir->dalvikInsn.opcode) <
@@ -1064,7 +1070,7 @@
                                                               kArenaAllocDFInfo));
     }
 
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (df_attributes & DF_HAS_USES) {
       num_uses = 0;
@@ -1252,7 +1258,7 @@
       use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = GetDataFlowAttributes(mir);
       // Implicit use of Method* ? */
       if (df_attributes & DF_UMS) {
         /*
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index 7c630e8..98866d9 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -21,10 +21,10 @@
 #include "base/logging.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_driver-inl.h"
-#include "mirror/class_loader.h"  // Only to allow casts in SirtRef<ClassLoader>.
-#include "mirror/dex_cache.h"     // Only to allow casts in SirtRef<DexCache>.
+#include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
+#include "mirror/dex_cache.h"     // Only to allow casts in Handle<DexCache>.
 #include "scoped_thread_state_change.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -43,11 +43,12 @@
   // We're going to resolve fields and check access in a tight loop. It's better to hold
   // the lock and needed references once than re-acquiring them again and again.
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::DexCache> dex_cache(soa.Self(), compiler_driver->GetDexCache(mUnit));
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-      compiler_driver->GetClassLoader(soa, mUnit));
-  SirtRef<mirror::Class> referrer_class(soa.Self(),
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
+  Handle<mirror::Class> referrer_class(hs.NewHandle(
+      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
   // Even if the referrer class is unresolved (i.e. we're compiling a method without class
   // definition) we still want to resolve fields and record all available info.
 
@@ -63,7 +64,7 @@
     bool is_volatile = compiler_driver->IsFieldVolatile(resolved_field);
 
     std::pair<bool, bool> fast_path = compiler_driver->IsFastInstanceField(
-        dex_cache.get(), referrer_class.get(), resolved_field, field_idx, &it->field_offset_);
+        dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx, &it->field_offset_);
     it->flags_ = 0u |  // Without kFlagIsStatic.
         (is_volatile ? kFlagIsVolatile : 0u) |
         (fast_path.first ? kFlagFastGet : 0u) |
@@ -89,11 +90,12 @@
   // We're going to resolve fields and check access in a tight loop. It's better to hold
   // the lock and needed references once than re-acquiring them again and again.
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::DexCache> dex_cache(soa.Self(), compiler_driver->GetDexCache(mUnit));
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-      compiler_driver->GetClassLoader(soa, mUnit));
-  SirtRef<mirror::Class> referrer_class(soa.Self(),
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
+  Handle<mirror::Class> referrer_class(hs.NewHandle(
+      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
   // Even if the referrer class is unresolved (i.e. we're compiling a method without class
   // definition) we still want to resolve fields and record all available info.
 
@@ -110,7 +112,7 @@
 
     bool is_referrers_class, is_initialized;
     std::pair<bool, bool> fast_path = compiler_driver->IsFastStaticField(
-        dex_cache.get(), referrer_class.get(), resolved_field, field_idx, &it->field_offset_,
+        dex_cache.Get(), referrer_class.Get(), resolved_field, field_idx, &it->field_offset_,
         &it->storage_index_, &is_referrers_class, &is_initialized);
     it->flags_ = kFlagIsStatic |
         (is_volatile ? kFlagIsVolatile : 0u) |
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 8ce4f1f..24fea71 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -17,10 +17,12 @@
 #include "mir_graph.h"
 
 #include <inttypes.h>
+#include <queue>
 
 #include "base/stl_util.h"
 #include "compiler_internals.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
@@ -44,6 +46,20 @@
   "Check1",
   "Check2",
   "Select",
+  "ConstVector",
+  "MoveVector",
+  "PackedMultiply",
+  "PackedAddition",
+  "PackedSubtract",
+  "PackedShiftLeft",
+  "PackedSignedShiftRight",
+  "PackedUnsignedShiftRight",
+  "PackedAnd",
+  "PackedOr",
+  "PackedXor",
+  "PackedAddReduce",
+  "PackedReduce",
+  "PackedSet",
 };
 
 MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
@@ -61,9 +77,9 @@
       dfs_order_(NULL),
       dfs_post_order_(NULL),
       dom_post_order_traversal_(NULL),
+      topological_order_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
-      temp_dalvik_register_v_(NULL),
       temp_scoped_alloc_(),
       temp_insn_data_(nullptr),
       temp_bit_vector_size_(0u),
@@ -106,11 +122,17 @@
 /*
  * Parse an instruction, return the length of the instruction
  */
-int MIRGraph::ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_instruction) {
-  const Instruction* instruction = Instruction::At(code_ptr);
-  *decoded_instruction = DecodedInstruction(instruction);
-
-  return instruction->SizeInCodeUnits();
+int MIRGraph::ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction) {
+  const Instruction* inst = Instruction::At(code_ptr);
+  decoded_instruction->opcode = inst->Opcode();
+  decoded_instruction->vA = inst->HasVRegA() ? inst->VRegA() : 0;
+  decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
+  decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
+  decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
+  if (inst->HasVarArgs()) {
+    inst->GetVarArgs(decoded_instruction->arg);
+  }
+  return inst->SizeInCodeUnits();
 }
 
 
@@ -128,7 +150,7 @@
   if (insn == NULL) {
     LOG(FATAL) << "Break split failed";
   }
-  BasicBlock *bottom_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* bottom_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(bottom_block);
 
   bottom_block->start_offset = code_offset;
@@ -166,16 +188,16 @@
     orig_block->successor_blocks = NULL;
     GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks);
     while (true) {
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
       if (successor_block_info == NULL) break;
-      BasicBlock *bb = GetBasicBlock(successor_block_info->block);
+      BasicBlock* bb = GetBasicBlock(successor_block_info->block);
       bb->predecessors->Delete(orig_block->id);
       bb->predecessors->Insert(bottom_block->id);
     }
   }
 
   orig_block->last_mir_insn = prev;
-  prev->next = NULL;
+  prev->next = nullptr;
 
   /*
    * Update the immediate predecessor block pointer so that outgoing edges
@@ -199,6 +221,7 @@
   while (p != bottom_block->last_mir_insn) {
     p = p->next;
     DCHECK(p != nullptr);
+    p->bb = bottom_block->id;
     int opcode = p->dalvikInsn.opcode;
     /*
      * Some messiness here to ensure that we only enter real opcodes and only the
@@ -275,7 +298,7 @@
     }
   }
 
-  // Iterate over each of the handlers to enqueue the empty Catch blocks
+  // Iterate over each of the handlers to enqueue the empty Catch blocks.
   const byte* handlers_ptr = DexFile::GetCatchHandlerData(*current_code_item_, 0);
   uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
   for (uint32_t idx = 0; idx < handlers_size; idx++) {
@@ -322,7 +345,7 @@
       LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
   }
   CountBranch(target);
-  BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
+  BasicBlock* taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block->id;
   taken_block->predecessors->Insert(cur_block->id);
@@ -382,7 +405,7 @@
     size = switch_data[1];
     first_key = switch_data[2] | (switch_data[3] << 16);
     target_table = reinterpret_cast<const int*>(&switch_data[4]);
-    keyTable = NULL;        // Make the compiler happy
+    keyTable = NULL;        // Make the compiler happy.
   /*
    * Sparse switch data format:
    *  ushort ident = 0x0200   magic value
@@ -398,7 +421,7 @@
     size = switch_data[1];
     keyTable = reinterpret_cast<const int*>(&switch_data[2]);
     target_table = reinterpret_cast<const int*>(&switch_data[2 + size*2]);
-    first_key = 0;   // To make the compiler happy
+    first_key = 0;   // To make the compiler happy.
   }
 
   if (cur_block->successor_block_list_type != kNotUsed) {
@@ -411,9 +434,9 @@
       new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, size, kGrowableArraySuccessorBlocks);
 
   for (i = 0; i < size; i++) {
-    BasicBlock *case_block = FindBlock(cur_offset + target_table[i], /* split */ true,
+    BasicBlock* case_block = FindBlock(cur_offset + target_table[i], /* split */ true,
                                       /* create */ true, /* immed_pred_block_p */ &cur_block);
-    SuccessorBlockInfo *successor_block_info =
+    SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
                                                        kArenaAllocSuccessor));
     successor_block_info->block = case_block->id;
@@ -456,13 +479,13 @@
         new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, 2, kGrowableArraySuccessorBlocks);
 
     for (; iterator.HasNext(); iterator.Next()) {
-      BasicBlock *catch_block = FindBlock(iterator.GetHandlerAddress(), false /* split*/,
+      BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* split*/,
                                          false /* creat */, NULL  /* immed_pred_block_p */);
       catch_block->catch_entry = true;
       if (kIsDebugBuild) {
         catches_.insert(catch_block->start_offset);
       }
-      SuccessorBlockInfo *successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
           (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
       successor_block_info->block = catch_block->id;
       successor_block_info->key = iterator.GetHandlerTypeIndex();
@@ -470,7 +493,7 @@
       catch_block->predecessors->Insert(cur_block->id);
     }
   } else if (build_all_edges) {
-    BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
+    BasicBlock* eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
     eh_block->start_offset = cur_offset;
@@ -480,7 +503,7 @@
   if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
-      // Force creation of new block following THROW via side-effect
+      // Force creation of new block following THROW via side-effect.
       FindBlock(cur_offset + width, /* split */ false, /* create */ true,
                 /* immed_pred_block_p */ NULL);
     }
@@ -522,11 +545,11 @@
   new_block->start_offset = insn->offset;
   cur_block->fall_through = new_block->id;
   new_block->predecessors->Insert(cur_block->id);
-  MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+  MIR* new_insn = NewMIR();
   *new_insn = *insn;
   insn->dalvikInsn.opcode =
       static_cast<Instruction::Code>(kMirOpCheck);
-  // Associate the two halves
+  // Associate the two halves.
   insn->meta.throw_insn = new_insn;
   new_block->AppendMIR(new_insn);
   return new_block;
@@ -593,7 +616,7 @@
   }
 
   /* Current block to record parsed instructions */
-  BasicBlock *cur_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* cur_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   DCHECK_EQ(current_offset_, 0U);
   cur_block->start_offset = current_offset_;
   block_list_.Insert(cur_block);
@@ -608,11 +631,10 @@
 
   /* Parse all instructions and put them into containing basic blocks */
   while (code_ptr < code_end) {
-    MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+    MIR *insn = NewMIR();
     insn->offset = current_offset_;
     insn->m_unit_index = current_method_;
     int width = ParseInsn(code_ptr, &insn->dalvikInsn);
-    insn->width = width;
     Instruction::Code opcode = insn->dalvikInsn.opcode;
     if (opcode_count_ != NULL) {
       opcode_count_[static_cast<int>(opcode)]++;
@@ -621,7 +643,7 @@
     int flags = Instruction::FlagsOf(insn->dalvikInsn.opcode);
     int verify_flags = Instruction::VerifyFlagsOf(insn->dalvikInsn.opcode);
 
-    uint64_t df_flags = oat_data_flow_attributes_[insn->dalvikInsn.opcode];
+    uint64_t df_flags = GetDataFlowAttributes(insn);
     merged_df_flags |= df_flags;
 
     if (df_flags & DF_HAS_DEFS) {
@@ -632,7 +654,7 @@
       cur_block->use_lvn = true;  // Run local value numbering on this basic block.
     }
 
-    // Check for inline data block signatures
+    // Check for inline data block signatures.
     if (opcode == Instruction::NOP) {
       // A simple NOP will have a width of 1 at this point, embedded data NOP > 1.
       if ((width == 1) && ((current_offset_ & 0x1) == 0x1) && ((code_end - code_ptr) > 1)) {
@@ -743,6 +765,17 @@
   }
 }
 
+uint64_t MIRGraph::GetDataFlowAttributes(Instruction::Code opcode) {
+  DCHECK_LT((size_t) opcode, (sizeof(oat_data_flow_attributes_) / sizeof(oat_data_flow_attributes_[0])));
+  return oat_data_flow_attributes_[opcode];
+}
+
+uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) {
+  DCHECK(mir != nullptr);
+  Instruction::Code opcode = mir->dalvikInsn.opcode;
+  return GetDataFlowAttributes(opcode);
+}
+
 // TODO: use a configurable base prefix, and adjust callers to supply pass name.
 /* Dump the CFG into a DOT graph */
 void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) {
@@ -765,7 +798,7 @@
 
   for (idx = 0; idx < num_blocks; idx++) {
     int block_idx = all_blocks ? idx : dfs_order_->Get(idx);
-    BasicBlock *bb = GetBasicBlock(block_idx);
+    BasicBlock* bb = GetBasicBlock(block_idx);
     if (bb == NULL) continue;
     if (bb->block_type == kDead) continue;
     if (bb->block_type == kEntryBlock) {
@@ -775,18 +808,40 @@
     } else if (bb->block_type == kDalvikByteCode) {
       fprintf(file, "  block%04x_%d [shape=record,label = \"{ \\\n",
               bb->start_offset, bb->id);
-      const MIR *mir;
+      const MIR* mir;
         fprintf(file, "    {block id %d\\l}%s\\\n", bb->id,
                 bb->first_mir_insn ? " | " : " ");
         for (mir = bb->first_mir_insn; mir; mir = mir->next) {
             int opcode = mir->dalvikInsn.opcode;
-            fprintf(file, "    {%04x %s %s %s\\l}%s\\\n", mir->offset,
-                    mir->ssa_rep ? GetDalvikDisassembly(mir) :
-                    (opcode < kMirOpFirst) ?  Instruction::Name(mir->dalvikInsn.opcode) :
-                    extended_mir_op_names_[opcode - kMirOpFirst],
-                    (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
-                    (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
-                    mir->next ? " | " : " ");
+            if (opcode > kMirOpSelect && opcode < kMirOpLast) {
+              if (opcode == kMirOpConstVector) {
+                fprintf(file, "    {%04x %s %d %d %d %d %d %d\\l}%s\\\n", mir->offset,
+                        extended_mir_op_names_[kMirOpConstVector - kMirOpFirst],
+                        mir->dalvikInsn.vA,
+                        mir->dalvikInsn.vB,
+                        mir->dalvikInsn.arg[0],
+                        mir->dalvikInsn.arg[1],
+                        mir->dalvikInsn.arg[2],
+                        mir->dalvikInsn.arg[3],
+                        mir->next ? " | " : " ");
+              } else {
+                fprintf(file, "    {%04x %s %d %d %d\\l}%s\\\n", mir->offset,
+                        extended_mir_op_names_[opcode - kMirOpFirst],
+                        mir->dalvikInsn.vA,
+                        mir->dalvikInsn.vB,
+                        mir->dalvikInsn.vC,
+                        mir->next ? " | " : " ");
+              }
+            } else {
+              fprintf(file, "    {%04x %s %s %s\\l}%s\\\n", mir->offset,
+                      mir->ssa_rep ? GetDalvikDisassembly(mir) :
+                      (opcode < kMirOpFirst) ?
+                        Instruction::Name(mir->dalvikInsn.opcode) :
+                        extended_mir_op_names_[opcode - kMirOpFirst],
+                      (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
+                      (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
+                      mir->next ? " | " : " ");
+            }
         }
         fprintf(file, "  }\"];\n\n");
     } else if (bb->block_type == kExceptionHandling) {
@@ -815,13 +870,13 @@
               bb->start_offset, bb->id,
               (bb->successor_block_list_type == kCatch) ?  "Mrecord" : "record");
       GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks);
-      SuccessorBlockInfo *successor_block_info = iterator.Next();
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
 
       int succ_id = 0;
       while (true) {
         if (successor_block_info == NULL) break;
 
-        BasicBlock *dest_block = GetBasicBlock(successor_block_info->block);
+        BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
         SuccessorBlockInfo *next_successor_block_info = iterator.Next();
 
         fprintf(file, "    {<f%d> %04x: %04x\\l}%s\\\n",
@@ -843,7 +898,7 @@
 
       succ_id = 0;
       while (true) {
-        SuccessorBlockInfo *successor_block_info = iter.Next();
+        SuccessorBlockInfo* successor_block_info = iter.Next();
         if (successor_block_info == NULL) break;
 
         BasicBlock* dest_block = GetBasicBlock(successor_block_info->block);
@@ -870,40 +925,171 @@
   fclose(file);
 }
 
-/* Insert an MIR instruction to the end of a basic block */
+/* Insert an MIR instruction to the end of a basic block. */
 void BasicBlock::AppendMIR(MIR* mir) {
-  if (first_mir_insn == nullptr) {
-    DCHECK(last_mir_insn == nullptr);
-    last_mir_insn = first_mir_insn = mir;
-    mir->next = nullptr;
-  } else {
-    last_mir_insn->next = mir;
-    mir->next = nullptr;
-    last_mir_insn = mir;
+  // Insert it after the last MIR.
+  InsertMIRListAfter(last_mir_insn, mir, mir);
+}
+
+void BasicBlock::AppendMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  // Insert it after the last MIR.
+  InsertMIRListAfter(last_mir_insn, first_list_mir, last_list_mir);
+}
+
+void BasicBlock::AppendMIRList(const std::vector<MIR*>& insns) {
+  for (std::vector<MIR*>::const_iterator it = insns.begin(); it != insns.end(); it++) {
+    MIR* new_mir = *it;
+
+    // Add a copy of each MIR.
+    InsertMIRListAfter(last_mir_insn, new_mir, new_mir);
   }
 }
 
-/* Insert an MIR instruction to the head of a basic block */
-void BasicBlock::PrependMIR(MIR* mir) {
-  if (first_mir_insn == nullptr) {
-    DCHECK(last_mir_insn == nullptr);
-    last_mir_insn = first_mir_insn = mir;
-    mir->next = nullptr;
-  } else {
-    mir->next = first_mir_insn;
-    first_mir_insn = mir;
-  }
-}
-
-/* Insert a MIR instruction after the specified MIR */
+/* Insert a MIR instruction after the specified MIR. */
 void BasicBlock::InsertMIRAfter(MIR* current_mir, MIR* new_mir) {
-  new_mir->next = current_mir->next;
-  current_mir->next = new_mir;
+  InsertMIRListAfter(current_mir, new_mir, new_mir);
+}
 
-  if (last_mir_insn == current_mir) {
-    /* Is the last MIR in the block */
-    last_mir_insn = new_mir;
+void BasicBlock::InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir) {
+  // If no MIR, we are done.
+  if (first_list_mir == nullptr || last_list_mir == nullptr) {
+    return;
   }
+
+  // If insert_after is null, assume BB is empty.
+  if (insert_after == nullptr) {
+    first_mir_insn = first_list_mir;
+    last_mir_insn = last_list_mir;
+    last_list_mir->next = nullptr;
+  } else {
+    MIR* after_list = insert_after->next;
+    insert_after->next = first_list_mir;
+    last_list_mir->next = after_list;
+    if (after_list == nullptr) {
+      last_mir_insn = last_list_mir;
+    }
+  }
+
+  // Set this BB to be the basic block of the MIRs.
+  MIR* last = last_list_mir->next;
+  for (MIR* mir = first_list_mir; mir != last; mir = mir->next) {
+    mir->bb = id;
+  }
+}
+
+/* Insert an MIR instruction to the head of a basic block. */
+void BasicBlock::PrependMIR(MIR* mir) {
+  InsertMIRListBefore(first_mir_insn, mir, mir);
+}
+
+void BasicBlock::PrependMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  // Insert it before the first MIR.
+  InsertMIRListBefore(first_mir_insn, first_list_mir, last_list_mir);
+}
+
+void BasicBlock::PrependMIRList(const std::vector<MIR*>& to_add) {
+  for (std::vector<MIR*>::const_iterator it = to_add.begin(); it != to_add.end(); it++) {
+    MIR* mir = *it;
+
+    InsertMIRListBefore(first_mir_insn, mir, mir);
+  }
+}
+
+/* Insert a MIR instruction before the specified MIR. */
+void BasicBlock::InsertMIRBefore(MIR* current_mir, MIR* new_mir) {
+  // Insert as a single element list.
+  return InsertMIRListBefore(current_mir, new_mir, new_mir);
+}
+
+MIR* BasicBlock::FindPreviousMIR(MIR* mir) {
+  MIR* current = first_mir_insn;
+
+  while (current != nullptr) {
+    MIR* next = current->next;
+
+    if (next == mir) {
+      return current;
+    }
+
+    current = next;
+  }
+
+  return nullptr;
+}
+
+void BasicBlock::InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir) {
+  // If no MIR, we are done.
+  if (first_list_mir == nullptr || last_list_mir == nullptr) {
+    return;
+  }
+
+  // If insert_before is null, assume BB is empty.
+  if (insert_before == nullptr) {
+    first_mir_insn = first_list_mir;
+    last_mir_insn = last_list_mir;
+    last_list_mir->next = nullptr;
+  } else {
+    if (first_mir_insn == insert_before) {
+      last_list_mir->next = first_mir_insn;
+      first_mir_insn = first_list_mir;
+    } else {
+      // Find the preceding MIR.
+      MIR* before_list = FindPreviousMIR(insert_before);
+      DCHECK(before_list != nullptr);
+      before_list->next = first_list_mir;
+      last_list_mir->next = insert_before;
+    }
+  }
+
+  // Set this BB to be the basic block of the MIRs.
+  for (MIR* mir = first_list_mir; mir != last_list_mir->next; mir = mir->next) {
+    mir->bb = id;
+  }
+}
+
+bool BasicBlock::RemoveMIR(MIR* mir) {
+  // Remove as a single element list.
+  return RemoveMIRList(mir, mir);
+}
+
+bool BasicBlock::RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir) {
+  if (first_list_mir == nullptr) {
+    return false;
+  }
+
+  // Try to find the MIR.
+  MIR* before_list = nullptr;
+  MIR* after_list = nullptr;
+
+  // If we are removing from the beginning of the MIR list.
+  if (first_mir_insn == first_list_mir) {
+    before_list = nullptr;
+  } else {
+    before_list = FindPreviousMIR(first_list_mir);
+    if (before_list == nullptr) {
+      // We did not find the mir.
+      return false;
+    }
+  }
+
+  // Remove the BB information and also find the after_list
+  for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) {
+    mir->bb = NullBasicBlockId;
+  }
+
+  after_list = last_list_mir->next;
+
+  // If there is nothing before the list, after_list is the first_mir
+  if (before_list == nullptr) {
+    first_mir_insn = after_list;
+  }
+
+  // If there is nothing after the list, before_list is last_mir
+  if (after_list == nullptr) {
+    last_mir_insn = before_list;
+  }
+
+  return true;
 }
 
 MIR* BasicBlock::GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current) {
@@ -924,14 +1110,14 @@
 }
 
 char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
-  DecodedInstruction insn = mir->dalvikInsn;
+  MIR::DecodedInstruction insn = mir->dalvikInsn;
   std::string str;
   int flags = 0;
   int opcode = insn.opcode;
   char* ret;
   bool nop = false;
   SSARepresentation* ssa_rep = mir->ssa_rep;
-  Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format
+  Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
   int defs = (ssa_rep != NULL) ? ssa_rep->num_defs : 0;
   int uses = (ssa_rep != NULL) ? ssa_rep->num_uses : 0;
 
@@ -939,7 +1125,7 @@
   if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
     str.append(": ");
-    // Recover the original Dex instruction
+    // Recover the original Dex instruction.
     insn = mir->meta.throw_insn->dalvikInsn;
     ssa_rep = mir->meta.throw_insn->ssa_rep;
     defs = ssa_rep->num_defs;
@@ -998,7 +1184,7 @@
     str.append(StringPrintf(" 0x%x (%c%x)", mir->offset + offset,
                             offset > 0 ? '+' : '-', offset > 0 ? offset : -offset));
   } else {
-    // For invokes-style formats, treat wide regs as a pair of singles
+    // For invokes-style formats, treat wide regs as a pair of singles.
     bool show_singles = ((dalvik_format == Instruction::k35c) ||
                          (dalvik_format == Instruction::k3rc));
     if (defs != 0) {
@@ -1019,28 +1205,28 @@
       }
     }
     switch (dalvik_format) {
-      case Instruction::k11n:  // Add one immediate from vB
+      case Instruction::k11n:  // Add one immediate from vB.
       case Instruction::k21s:
       case Instruction::k31i:
       case Instruction::k21h:
         str.append(StringPrintf(", #%d", insn.vB));
         break;
-      case Instruction::k51l:  // Add one wide immediate
+      case Instruction::k51l:  // Add one wide immediate.
         str.append(StringPrintf(", #%" PRId64, insn.vB_wide));
         break;
-      case Instruction::k21c:  // One register, one string/type/method index
+      case Instruction::k21c:  // One register, one string/type/method index.
       case Instruction::k31c:
         str.append(StringPrintf(", index #%d", insn.vB));
         break;
-      case Instruction::k22c:  // Two registers, one string/type/method index
+      case Instruction::k22c:  // Two registers, one string/type/method index.
         str.append(StringPrintf(", index #%d", insn.vC));
         break;
-      case Instruction::k22s:  // Add one immediate from vC
+      case Instruction::k22s:  // Add one immediate from vC.
       case Instruction::k22b:
         str.append(StringPrintf(", #%d", insn.vC));
         break;
       default: {
-        // Nothing left to print
+        // Nothing left to print.
       }
     }
   }
@@ -1074,7 +1260,7 @@
 // Similar to GetSSAName, but if ssa name represents an immediate show that as well.
 std::string MIRGraph::GetSSANameWithConst(int ssa_reg, bool singles_only) {
   if (reg_location_ == NULL) {
-    // Pre-SSA - just use the standard name
+    // Pre-SSA - just use the standard name.
     return GetSSAName(ssa_reg);
   }
   if (IsConst(reg_location_[ssa_reg])) {
@@ -1186,10 +1372,16 @@
   return info;
 }
 
+// Allocate a new MIR.
+MIR* MIRGraph::NewMIR() {
+  MIR* mir = new (arena_) MIR();
+  return mir;
+}
+
 // Allocate a new basic block.
 BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
-  BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock),
-                                                          kArenaAllocBB));
+  BasicBlock* bb = new (arena_) BasicBlock();
+
   bb->block_type = block_type;
   bb->id = block_id;
   // TUNING: better estimate of the exit block predecessors?
@@ -1207,18 +1399,24 @@
 }
 
 void MIRGraph::InitializeMethodUses() {
-  // The gate starts by initializing the use counts
+  // The gate starts by initializing the use counts.
   int num_ssa_regs = GetNumSSARegs();
   use_counts_.Resize(num_ssa_regs + 32);
   raw_use_counts_.Resize(num_ssa_regs + 32);
-  // Initialize list
+  // Initialize list.
   for (int i = 0; i < num_ssa_regs; i++) {
     use_counts_.Insert(0);
     raw_use_counts_.Insert(0);
   }
 }
 
-void MIRGraph::InitializeSSATransformation() {
+void MIRGraph::SSATransformationStart() {
+  DCHECK(temp_scoped_alloc_.get() == nullptr);
+  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+  temp_bit_vector_size_ = cu_->num_dalvik_registers;
+  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV);
+
   /* Compute the DFS order */
   ComputeDFSOrders();
 
@@ -1239,4 +1437,477 @@
   DoDFSPreOrderSSARename(GetEntryBlock());
 }
 
+void MIRGraph::SSATransformationEnd() {
+  // Verify the dataflow information after the pass.
+  if (cu_->enable_debug & (1 << kDebugVerifyDataflow)) {
+    VerifyDataflow();
+  }
+
+  temp_bit_vector_size_ = 0u;
+  temp_bit_vector_ = nullptr;
+  DCHECK(temp_scoped_alloc_.get() != nullptr);
+  temp_scoped_alloc_.reset();
+}
+
+void MIRGraph::ComputeTopologicalSortOrder() {
+  std::queue<BasicBlock *> q;
+  std::map<int, int> visited_cnt_values;
+
+  // Clear the nodes.
+  ClearAllVisitedFlags();
+
+  // Create the topological order if need be.
+  if (topological_order_ != nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0);
+  }
+  topological_order_->Reset();
+
+  // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
+  // also fill initial queue.
+  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
+
+  while (true) {
+    BasicBlock* bb = iterator.Next();
+
+    if (bb == nullptr) {
+      break;
+    }
+
+    if (bb->hidden == true) {
+      continue;
+    }
+
+    visited_cnt_values[bb->id] = bb->predecessors->Size();
+
+    GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
+    // To process loops we should not wait for dominators.
+    while (true) {
+      BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next());
+
+      if (pred_bb == nullptr) {
+        break;
+      }
+
+      if (pred_bb->dominators == nullptr || pred_bb->hidden == true) {
+        continue;
+      }
+
+      // Skip the backward branch.
+      if (pred_bb->dominators->IsBitSet(bb->id) != 0) {
+        visited_cnt_values[bb->id]--;
+      }
+    }
+
+    // Add entry block to queue.
+    if (visited_cnt_values[bb->id] == 0) {
+      q.push(bb);
+    }
+  }
+
+  while (q.size() > 0) {
+    // Get top.
+    BasicBlock *bb = q.front();
+    q.pop();
+
+    DCHECK_EQ(bb->hidden, false);
+
+    if (bb->IsExceptionBlock() == true) {
+      continue;
+    }
+
+    // We've visited all the predecessors. So, we can visit bb.
+    if (bb->visited == false) {
+      bb->visited = true;
+
+      // Now add the basic block.
+      topological_order_->Insert(bb->id);
+
+      // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+      ChildBlockIterator succIter(bb, this);
+      BasicBlock *successor = succIter.Next();
+      while (successor != nullptr) {
+        // one more predecessor was visited.
+        visited_cnt_values[successor->id]--;
+
+        if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) {
+          q.push(successor);
+        }
+
+        // Take next successor.
+        successor = succIter.Next();
+      }
+    }
+  }
+}
+
+bool BasicBlock::IsExceptionBlock() const {
+  if (block_type == kExceptionHandling) {
+    return true;
+  }
+  return false;
+}
+
+ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
+    : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
+      visited_taken_(false), have_successors_(false) {
+  // Check if we actually do have successors.
+  if (basic_block_ != 0 && basic_block_->successor_block_list_type != kNotUsed) {
+    have_successors_ = true;
+    successor_iter_.Reset(basic_block_->successor_blocks);
+  }
+}
+
+BasicBlock* ChildBlockIterator::Next() {
+  // We check if we have a basic block. If we don't we cannot get next child.
+  if (basic_block_ == nullptr) {
+    return nullptr;
+  }
+
+  // If we haven't visited fallthrough, return that.
+  if (visited_fallthrough_ == false) {
+    visited_fallthrough_ = true;
+
+    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->fall_through);
+    if (result != nullptr) {
+      return result;
+    }
+  }
+
+  // If we haven't visited taken, return that.
+  if (visited_taken_ == false) {
+    visited_taken_ = true;
+
+    BasicBlock* result = mir_graph_->GetBasicBlock(basic_block_->taken);
+    if (result != nullptr) {
+      return result;
+    }
+  }
+
+  // We visited both taken and fallthrough. Now check if we have successors we need to visit.
+  if (have_successors_ == true) {
+    // Get information about next successor block.
+    SuccessorBlockInfo* successor_block_info = successor_iter_.Next();
+
+    // If we don't have anymore successors, return nullptr.
+    if (successor_block_info != nullptr) {
+      return mir_graph_->GetBasicBlock(successor_block_info->block);
+    }
+  }
+
+  // We do not have anything.
+  return nullptr;
+}
+
+BasicBlock* BasicBlock::Copy(CompilationUnit* c_unit) {
+  MIRGraph* mir_graph = c_unit->mir_graph.get();
+  return Copy(mir_graph);
+}
+
+BasicBlock* BasicBlock::Copy(MIRGraph* mir_graph) {
+  BasicBlock* result_bb = mir_graph->CreateNewBB(block_type);
+
+  // We don't do a memcpy style copy here because it would lead to a lot of things
+  // to clean up. Let us do it by hand instead.
+  // Copy in taken and fallthrough.
+  result_bb->fall_through = fall_through;
+  result_bb->taken = taken;
+
+  // Copy successor links if needed.
+  ArenaAllocator* arena = mir_graph->GetArena();
+
+  result_bb->successor_block_list_type = successor_block_list_type;
+  if (result_bb->successor_block_list_type != kNotUsed) {
+    size_t size = successor_blocks->Size();
+    result_bb->successor_blocks = new (arena) GrowableArray<SuccessorBlockInfo*>(arena, size, kGrowableArraySuccessorBlocks);
+    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
+    while (true) {
+      SuccessorBlockInfo* sbi_old = iterator.Next();
+      if (sbi_old == nullptr) {
+        break;
+      }
+      SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+      memcpy(sbi_new, sbi_old, sizeof(SuccessorBlockInfo));
+      result_bb->successor_blocks->Insert(sbi_new);
+    }
+  }
+
+  // Copy offset, method.
+  result_bb->start_offset = start_offset;
+
+  // Now copy instructions.
+  for (MIR* mir = first_mir_insn; mir != 0; mir = mir->next) {
+    // Get a copy first.
+    MIR* copy = mir->Copy(mir_graph);
+
+    // Append it.
+    result_bb->AppendMIR(copy);
+  }
+
+  return result_bb;
+}
+
+MIR* MIR::Copy(MIRGraph* mir_graph) {
+  MIR* res = mir_graph->NewMIR();
+  *res = *this;
+
+  // Remove links
+  res->next = nullptr;
+  res->bb = NullBasicBlockId;
+  res->ssa_rep = nullptr;
+
+  return res;
+}
+
+MIR* MIR::Copy(CompilationUnit* c_unit) {
+  return Copy(c_unit->mir_graph.get());
+}
+
+uint32_t SSARepresentation::GetStartUseIndex(Instruction::Code opcode) {
+  // Default result.
+  int res = 0;
+
+  // We are basically setting the iputs to their igets counterparts.
+  switch (opcode) {
+    case Instruction::IPUT:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::APUT:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT:
+      // Skip the VR containing what to store.
+      res = 1;
+      break;
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::APUT_WIDE:
+    case Instruction::SPUT_WIDE:
+      // Skip the two VRs containing what to store.
+      res = 2;
+      break;
+    default:
+      // Do nothing in the general case.
+      break;
+  }
+
+  return res;
+}
+
+/**
+ * @brief Given a decoded instruction, it checks whether the instruction
+ * sets a constant and if it does, more information is provided about the
+ * constant being set.
+ * @param ptr_value pointer to a 64-bit holder for the constant.
+ * @param wide Updated by function whether a wide constant is being set by bytecode.
+ * @return Returns false if the decoded instruction does not represent a constant bytecode.
+ */
+bool MIR::DecodedInstruction::GetConstant(int64_t* ptr_value, bool* wide) const {
+  bool sets_const = true;
+  int64_t value = vB;
+
+  DCHECK(ptr_value != nullptr);
+  DCHECK(wide != nullptr);
+
+  switch (opcode) {
+    case Instruction::CONST_4:
+    case Instruction::CONST_16:
+    case Instruction::CONST:
+      *wide = false;
+      value <<= 32;      // In order to get the sign extend.
+      value >>= 32;
+      break;
+    case Instruction::CONST_HIGH16:
+      *wide = false;
+      value <<= 48;      // In order to get the sign extend.
+      value >>= 32;
+      break;
+    case Instruction::CONST_WIDE_16:
+    case Instruction::CONST_WIDE_32:
+      *wide = true;
+      value <<= 32;      // In order to get the sign extend.
+      value >>= 32;
+      break;
+    case Instruction::CONST_WIDE:
+      *wide = true;
+      value = vB_wide;
+      break;
+    case Instruction::CONST_WIDE_HIGH16:
+      *wide = true;
+      value <<= 48;      // In order to get the sign extend.
+      break;
+    default:
+      sets_const = false;
+      break;
+  }
+
+  if (sets_const) {
+    *ptr_value = value;
+  }
+
+  return sets_const;
+}
+
+void BasicBlock::ResetOptimizationFlags(uint16_t reset_flags) {
+  // Reset flags for all MIRs in bb.
+  for (MIR* mir = first_mir_insn; mir != NULL; mir = mir->next) {
+    mir->optimization_flags &= (~reset_flags);
+  }
+}
+
+void BasicBlock::Hide(CompilationUnit* c_unit) {
+  // First lets make it a dalvik bytecode block so it doesn't have any special meaning.
+  block_type = kDalvikByteCode;
+
+  // Mark it as hidden.
+  hidden = true;
+
+  // Detach it from its MIRs so we don't generate code for them. Also detached MIRs
+  // are updated to know that they no longer have a parent.
+  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
+    mir->bb = NullBasicBlockId;
+  }
+  first_mir_insn = nullptr;
+  last_mir_insn = nullptr;
+
+  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
+
+  MIRGraph* mir_graph = c_unit->mir_graph.get();
+  while (true) {
+    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iterator.Next());
+    if (pred_bb == nullptr) {
+      break;
+    }
+
+    // Sadly we have to go through the children by hand here.
+    pred_bb->ReplaceChild(id, NullBasicBlockId);
+  }
+
+  // Iterate through children of bb we are hiding.
+  ChildBlockIterator successorChildIter(this, mir_graph);
+
+  for (BasicBlock* childPtr = successorChildIter.Next(); childPtr != 0; childPtr = successorChildIter.Next()) {
+    // Replace child with null child.
+    childPtr->predecessors->Delete(id);
+  }
+}
+
+bool BasicBlock::IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg) {
+  // In order to determine if the ssa reg is live out, we scan all the MIRs. We remember
+  // the last SSA number of the same dalvik register. At the end, if it is different than ssa_reg,
+  // then it is not live out of this BB.
+  int dalvik_reg = c_unit->mir_graph->SRegToVReg(ssa_reg);
+
+  int last_ssa_reg = -1;
+
+  // Walk through the MIRs backwards.
+  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
+    // Get ssa rep.
+    SSARepresentation *ssa_rep = mir->ssa_rep;
+
+    // Go through the defines for this MIR.
+    for (int i = 0; i < ssa_rep->num_defs; i++) {
+      DCHECK(ssa_rep->defs != nullptr);
+
+      // Get the ssa reg.
+      int def_ssa_reg = ssa_rep->defs[i];
+
+      // Get dalvik reg.
+      int def_dalvik_reg = c_unit->mir_graph->SRegToVReg(def_ssa_reg);
+
+      // Compare dalvik regs.
+      if (dalvik_reg == def_dalvik_reg) {
+        // We found a def of the register that we are being asked about.
+        // Remember it.
+        last_ssa_reg = def_ssa_reg;
+      }
+    }
+  }
+
+  if (last_ssa_reg == -1) {
+    // If we get to this point we couldn't find a define of register user asked about.
+    // Let's assume the user knows what he's doing so we can be safe and say that if we
+    // couldn't find a def, it is live out.
+    return true;
+  }
+
+  // If it is not -1, we found a match, is it ssa_reg?
+  return (ssa_reg == last_ssa_reg);
+}
+
+bool BasicBlock::ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb) {
+  // We need to check taken, fall_through, and successor_blocks to replace.
+  bool found = false;
+  if (taken == old_bb) {
+    taken = new_bb;
+    found = true;
+  }
+
+  if (fall_through == old_bb) {
+    fall_through = new_bb;
+    found = true;
+  }
+
+  if (successor_block_list_type != kNotUsed) {
+    GrowableArray<SuccessorBlockInfo*>::Iterator iterator(successor_blocks);
+    while (true) {
+      SuccessorBlockInfo* successor_block_info = iterator.Next();
+      if (successor_block_info == nullptr) {
+        break;
+      }
+      if (successor_block_info->block == old_bb) {
+        successor_block_info->block = new_bb;
+        found = true;
+      }
+    }
+  }
+
+  return found;
+}
+
+void BasicBlock::UpdatePredecessor(BasicBlockId old_parent, BasicBlockId new_parent) {
+  GrowableArray<BasicBlockId>::Iterator iterator(predecessors);
+  bool found = false;
+
+  while (true) {
+    BasicBlockId pred_bb_id = iterator.Next();
+
+    if (pred_bb_id == NullBasicBlockId) {
+      break;
+    }
+
+    if (pred_bb_id == old_parent) {
+      size_t idx = iterator.GetIndex() - 1;
+      predecessors->Put(idx, new_parent);
+      found = true;
+      break;
+    }
+  }
+
+  // If not found, add it.
+  if (found == false) {
+    predecessors->Insert(new_parent);
+  }
+}
+
+// Create a new basic block with block_id as num_blocks_ that is
+// post-incremented.
+BasicBlock* MIRGraph::CreateNewBB(BBType block_type) {
+  BasicBlock* res = NewMemBB(block_type, num_blocks_++);
+  block_list_.Insert(res);
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 2c125f6..53a997e 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -242,6 +242,8 @@
   bool* fp_use;
   int32_t* defs;
   bool* fp_def;
+
+  static uint32_t GetStartUseIndex(Instruction::Code opcode);
 };
 
 /*
@@ -254,11 +256,72 @@
    * additional fields on as-needed basis.  Question: how to support MIR Pseudo-ops; probably
    * need to carry aux data pointer.
    */
-  DecodedInstruction dalvikInsn;
-  uint16_t width;                 // Note: width can include switch table or fill array data.
+  struct DecodedInstruction {
+    uint32_t vA;
+    uint32_t vB;
+    uint64_t vB_wide;        /* for k51l */
+    uint32_t vC;
+    uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
+    Instruction::Code opcode;
+
+    explicit DecodedInstruction():vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) {
+    }
+
+    /*
+     * Given a decoded instruction representing a const bytecode, it updates
+     * the out arguments with proper values as dictated by the constant bytecode.
+     */
+    bool GetConstant(int64_t* ptr_value, bool* wide) const;
+
+    bool IsStore() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kStore) == Instruction::kStore);
+    }
+
+    bool IsLoad() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kLoad) == Instruction::kLoad);
+    }
+
+    bool IsConditionalBranch() const {
+      return (Instruction::FlagsOf(opcode) == (Instruction::kContinue | Instruction::kBranch));
+    }
+
+    /**
+     * @brief Is the register C component of the decoded instruction a constant?
+     */
+    bool IsCFieldOrConstant() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kRegCFieldOrConstant) == Instruction::kRegCFieldOrConstant);
+    }
+
+    /**
+     * @brief Is the register C component of the decoded instruction a constant?
+     */
+    bool IsBFieldOrConstant() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kRegBFieldOrConstant) == Instruction::kRegBFieldOrConstant);
+    }
+
+    bool IsCast() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kCast) == Instruction::kCast);
+    }
+
+    /**
+     * @brief Does the instruction clobber memory?
+     * @details Clobber means that the instruction changes the memory not in a punctual way.
+     *          Therefore any supposition on memory aliasing or memory contents should be disregarded
+     *            when crossing such an instruction.
+     */
+    bool Clobbers() const {
+      return ((Instruction::FlagsOf(opcode) & Instruction::kClobber) == Instruction::kClobber);
+    }
+
+    bool IsLinear() const {
+      return (Instruction::FlagsOf(opcode) & (Instruction::kAdd | Instruction::kSubtract)) != 0;
+    }
+  } dalvikInsn;
+
   NarrowDexOffset offset;         // Offset of the instruction in code units.
   uint16_t optimization_flags;
   int16_t m_unit_index;           // From which method was this MIR included
+  BasicBlockId bb;
   MIR* next;
   SSARepresentation* ssa_rep;
   union {
@@ -277,6 +340,23 @@
     // INVOKE data index, points to MIRGraph::method_lowering_infos_.
     uint32_t method_lowering_info;
   } meta;
+
+  explicit MIR():offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId),
+                 next(nullptr), ssa_rep(nullptr) {
+    memset(&meta, 0, sizeof(meta));
+  }
+
+  uint32_t GetStartUseIndex() const {
+    return SSARepresentation::GetStartUseIndex(dalvikInsn.opcode);
+  }
+
+  MIR* Copy(CompilationUnit *c_unit);
+  MIR* Copy(MIRGraph* mir_Graph);
+
+  static void* operator new(size_t size, ArenaAllocator* arena) {
+    return arena->Alloc(sizeof(MIR), kArenaAllocMIR);
+  }
+  static void operator delete(void* p) {}  // Nop.
 };
 
 struct SuccessorBlockInfo;
@@ -309,8 +389,49 @@
   GrowableArray<SuccessorBlockInfo*>* successor_blocks;
 
   void AppendMIR(MIR* mir);
+  void AppendMIRList(MIR* first_list_mir, MIR* last_list_mir);
+  void AppendMIRList(const std::vector<MIR*>& insns);
   void PrependMIR(MIR* mir);
+  void PrependMIRList(MIR* first_list_mir, MIR* last_list_mir);
+  void PrependMIRList(const std::vector<MIR*>& to_add);
   void InsertMIRAfter(MIR* current_mir, MIR* new_mir);
+  void InsertMIRListAfter(MIR* insert_after, MIR* first_list_mir, MIR* last_list_mir);
+  MIR* FindPreviousMIR(MIR* mir);
+  void InsertMIRBefore(MIR* insert_before, MIR* list);
+  void InsertMIRListBefore(MIR* insert_before, MIR* first_list_mir, MIR* last_list_mir);
+  bool RemoveMIR(MIR* mir);
+  bool RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir);
+
+  BasicBlock* Copy(CompilationUnit* c_unit);
+  BasicBlock* Copy(MIRGraph* mir_graph);
+
+  /**
+   * @brief Reset the optimization_flags field of each MIR.
+   */
+  void ResetOptimizationFlags(uint16_t reset_flags);
+
+  /**
+   * @brief Hide the BasicBlock.
+   * @details Set it to kDalvikByteCode, set hidden to true, remove all MIRs,
+   *          remove itself from any predecessor edges, remove itself from any
+   *          child's predecessor growable array.
+   */
+  void Hide(CompilationUnit* c_unit);
+
+  /**
+   * @brief Is ssa_reg the last SSA definition of that VR in the block?
+   */
+  bool IsSSALiveOut(const CompilationUnit* c_unit, int ssa_reg);
+
+  /**
+   * @brief Replace the edge going to old_bb to now go towards new_bb.
+   */
+  bool ReplaceChild(BasicBlockId old_bb, BasicBlockId new_bb);
+
+  /**
+   * @brief Update the predecessor growable array from old_pred to new_pred.
+   */
+  void UpdatePredecessor(BasicBlockId old_pred, BasicBlockId new_pred);
 
   /**
    * @brief Used to obtain the next MIR that follows unconditionally.
@@ -321,11 +442,17 @@
    * @return Returns the following MIR if one can be found.
    */
   MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
+  bool IsExceptionBlock() const;
+
+  static void* operator new(size_t size, ArenaAllocator* arena) {
+    return arena->Alloc(sizeof(BasicBlock), kArenaAllocBB);
+  }
+  static void operator delete(void* p) {}  // Nop.
 };
 
 /*
  * The "blocks" field in "successor_block_list" points to an array of elements with the type
- * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For swtich
+ * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For switch
  * blocks, key is the case value.
  */
 struct SuccessorBlockInfo {
@@ -333,6 +460,29 @@
   int key;
 };
 
+/**
+ * @class ChildBlockIterator
+ * @brief Enable an easy iteration of the children.
+ */
+class ChildBlockIterator {
+ public:
+  /**
+   * @brief Constructs a child iterator.
+   * @param bb The basic whose children we need to iterate through.
+   * @param mir_graph The MIRGraph used to get the basic block during iteration.
+   */
+  ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph);
+  BasicBlock* Next();
+
+ private:
+  BasicBlock* basic_block_;
+  MIRGraph* mir_graph_;
+  bool visited_fallthrough_;
+  bool visited_taken_;
+  bool have_successors_;
+  GrowableArray<SuccessorBlockInfo*>::Iterator successor_iter_;
+};
+
 /*
  * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
  * the type of an SSA name (and, can also be used by code generators to record where the
@@ -360,13 +510,10 @@
   unsigned ref:1;       // Something GC cares about.
   unsigned high_word:1;  // High word of pair?
   unsigned home:1;      // Does this represent the home location?
-  VectorLengthType vec_len:3;  // TODO: remove.  Is this value in a vector register, and how big is it?
   RegStorage reg;       // Encoded physical registers.
   int16_t s_reg_low;    // SSA name for low Dalvik word.
   int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
                         // and consolidate usage w/ s_reg_low.
-
-  bool IsVectorScalar() const { return vec_len == kVectorLength4 || vec_len == kVectorLength8;}
 };
 
 /*
@@ -392,8 +539,8 @@
 };
 
 
-const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, kVectorNotUsed,
-                             RegStorage(RegStorage::kInvalid), INVALID_SREG, INVALID_SREG};
+const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, RegStorage(), INVALID_SREG,
+                             INVALID_SREG};
 
 class MIRGraph {
  public:
@@ -545,6 +692,10 @@
 
   void BasicBlockOptimization();
 
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
+    return topological_order_;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -808,9 +959,12 @@
   void DumpMIRGraph();
   CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
   BasicBlock* NewMemBB(BBType block_type, int block_id);
+  MIR* NewMIR();
   MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir);
   BasicBlock* NextDominatedBlock(BasicBlock* bb);
   bool LayoutBlocks(BasicBlock* bb);
+  void ComputeTopologicalSortOrder();
+  BasicBlock* CreateNewBB(BBType block_type);
 
   bool InlineCallsGate();
   void InlineCallsStart();
@@ -830,7 +984,7 @@
   /**
    * @brief Perform the initial preparation for the SSA Transformation.
    */
-  void InitializeSSATransformation();
+  void SSATransformationStart();
 
   /**
    * @brief Insert a the operands for the Phi nodes.
@@ -840,6 +994,11 @@
   bool InsertPhiNodeOperands(BasicBlock* bb);
 
   /**
+   * @brief Perform the cleanup after the SSA Transformation.
+   */
+  void SSATransformationEnd();
+
+  /**
    * @brief Perform constant propagation on a BasicBlock.
    * @param bb the considered BasicBlock.
    */
@@ -851,6 +1010,9 @@
    */
   void CountUses(struct BasicBlock* bb);
 
+  static uint64_t GetDataFlowAttributes(Instruction::Code opcode);
+  static uint64_t GetDataFlowAttributes(MIR* mir);
+
   /**
    * @brief Combine BasicBlocks
    * @param the BasicBlock we are considering
@@ -868,11 +1030,14 @@
   RegLocation* reg_location_;                         // Map SSA names to location.
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
-  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
   static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
   static const uint32_t analysis_attributes_[kMirOpLast];
 
- private:
+  void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
+  bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
+  void ComputeDFSOrders();
+
+ protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
                          const ArenaBitVector* src2);
@@ -882,7 +1047,7 @@
   void CompilerInitializeSSAConversion();
   bool DoSSAConversion(BasicBlock* bb);
   bool InvokeUsesMethodStar(MIR* mir);
-  int ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_instruction);
+  int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
   bool ContentIsInsn(const uint16_t* code_ptr);
   BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
                          BasicBlock** immed_pred_block_p);
@@ -898,17 +1063,14 @@
                               const uint16_t* code_end);
   int AddNewSReg(int v_reg);
   void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
-  void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   void DataFlowSSAFormat35C(MIR* mir);
   void DataFlowSSAFormat3RC(MIR* mir);
   bool FindLocalLiveIn(BasicBlock* bb);
-  bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
   bool VerifyPredInfo(BasicBlock* bb);
   BasicBlock* NeedsVisit(BasicBlock* bb);
   BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb);
   void MarkPreOrder(BasicBlock* bb);
   void RecordDFSOrders(BasicBlock* bb);
-  void ComputeDFSOrders();
   void ComputeDefBlockMatrix();
   void ComputeDomPostOrderTraversal(BasicBlock* bb);
   void ComputeDominators();
@@ -946,10 +1108,10 @@
   GrowableArray<BasicBlockId>* dfs_order_;
   GrowableArray<BasicBlockId>* dfs_post_order_;
   GrowableArray<BasicBlockId>* dom_post_order_traversal_;
+  GrowableArray<BasicBlockId>* topological_order_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
-  ArenaBitVector* temp_dalvik_register_v_;
-  UniquePtr<ScopedArenaAllocator> temp_scoped_alloc_;
+  std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
   uint16_t* temp_insn_data_;
   uint32_t temp_bit_vector_size_;
   ArenaBitVector* temp_bit_vector_;
@@ -985,6 +1147,7 @@
   GrowableArray<MirIFieldLoweringInfo> ifield_lowering_infos_;
   GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
   GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
+  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
 
   friend class ClassInitCheckEliminationTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 2c33ef1..cc2bd95 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -19,10 +19,10 @@
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "driver/compiler_driver-inl.h"
-#include "mirror/class_loader.h"  // Only to allow casts in SirtRef<ClassLoader>.
-#include "mirror/dex_cache.h"     // Only to allow casts in SirtRef<DexCache>.
+#include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
+#include "mirror/dex_cache.h"     // Only to allow casts in Handle<DexCache>.
 #include "scoped_thread_state_change.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -45,11 +45,12 @@
   // We're going to resolve methods and check access in a tight loop. It's better to hold
   // the lock and needed references once than re-acquiring them again and again.
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::DexCache> dex_cache(soa.Self(), compiler_driver->GetDexCache(mUnit));
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-      compiler_driver->GetClassLoader(soa, mUnit));
-  SirtRef<mirror::Class> referrer_class(soa.Self(),
-      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(compiler_driver->GetDexCache(mUnit)));
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(compiler_driver->GetClassLoader(soa, mUnit)));
+  Handle<mirror::Class> referrer_class(hs.NewHandle(
+      compiler_driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit)));
   // Even if the referrer class is unresolved (i.e. we're compiling a method without class
   // definition) we still want to resolve methods and record all available info.
 
@@ -73,10 +74,10 @@
 
     MethodReference target_method(mUnit->GetDexFile(), it->MethodIndex());
     int fast_path_flags = compiler_driver->IsFastInvoke(
-        soa, dex_cache, class_loader, mUnit, referrer_class.get(), resolved_method, &invoke_type,
+        soa, dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method, &invoke_type,
         &target_method, devirt_target, &it->direct_code_, &it->direct_method_);
     bool needs_clinit =
-        compiler_driver->NeedsClassInitialization(referrer_class.get(), resolved_method);
+        compiler_driver->NeedsClassInitialization(referrer_class.Get(), resolved_method);
     uint16_t other_flags = it->flags_ &
         ~(kFlagFastPath | kFlagNeedsClassInitialization | (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 937e258..1d4aef2 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -19,6 +19,7 @@
 #include "dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -43,13 +44,13 @@
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     // Skip pass if BB has MIR without SSA representation.
-    if (mir->ssa_rep == NULL) {
+    if (mir->ssa_rep == nullptr) {
        return;
     }
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (!(df_attributes & DF_HAS_DEFS)) continue;
 
@@ -239,7 +240,7 @@
 
 // FIXME - will probably need to revisit all uses of this, as type not defined.
 static const RegLocation temp_loc = {kLocCompilerTemp,
-                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/, kVectorNotUsed,
+                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/,
                                      RegStorage(), INVALID_SREG, INVALID_SREG};
 
 CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) {
@@ -267,13 +268,22 @@
     DCHECK_EQ(ct_type, kCompilerTempVR);
 
     // The new non-special compiler temp must receive a unique v_reg with a negative value.
-    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_;
+    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) -
+        num_non_special_compiler_temps_;
     compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
     num_non_special_compiler_temps_++;
 
     if (wide) {
-      // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a
-      // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low.
+      // Create a new CompilerTemp for the high part.
+      CompilerTemp *compiler_temp_high =
+          static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp), kArenaAllocRegAlloc));
+      compiler_temp_high->v_reg = compiler_temp->v_reg;
+      compiler_temp_high->s_reg_low = compiler_temp->s_reg_low;
+      compiler_temps_.Insert(compiler_temp_high);
+
+      // Ensure that the two registers are consecutive. Since the virtual registers used for temps
+      // grow in a negative fashion, we need the smaller to refer to the low part. Thus, we
+      // redefine the v_reg and s_reg_low.
       compiler_temp->v_reg--;
       int ssa_reg_high = compiler_temp->s_reg_low;
       compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
@@ -286,10 +296,6 @@
         reg_location_[ssa_reg_high].high_word = 1;
         reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
         reg_location_[ssa_reg_high].wide = true;
-
-        // A new SSA needs new use counts.
-        use_counts_.Insert(0);
-        raw_use_counts_.Insert(0);
       }
 
       num_non_special_compiler_temps_++;
@@ -302,10 +308,6 @@
     reg_location_[ssa_reg_low] = temp_loc;
     reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
     reg_location_[ssa_reg_low].wide = wide;
-
-    // A new SSA needs new use counts.
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
   }
 
   compiler_temps_.Insert(compiler_temp);
@@ -318,7 +320,7 @@
     return true;
   }
   bool use_lvn = bb->use_lvn;
-  UniquePtr<LocalValueNumbering> local_valnum;
+  std::unique_ptr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
     local_valnum.reset(LocalValueNumbering::Create(cu_));
   }
@@ -559,7 +561,7 @@
       if (mir->ssa_rep == NULL) {
         continue;
       }
-      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = GetDataFlowAttributes(mir);
       if (df_attributes & DF_HAS_NULL_CHKS) {
         checkstats_->null_checks++;
         if (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) {
@@ -644,7 +646,7 @@
     MIR* mir = bb->last_mir_insn;
     // Grab the attributes from the paired opcode
     MIR* throw_insn = mir->meta.throw_insn;
-    uint64_t df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(throw_insn);
     bool can_combine = true;
     if (df_attributes & DF_HAS_NULL_CHKS) {
       can_combine &= ((throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0);
@@ -743,18 +745,20 @@
       if (pred_bb->block_type == kDalvikByteCode) {
         // Check to see if predecessor had an explicit null-check.
         MIR* last_insn = pred_bb->last_mir_insn;
-        Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-        if (last_opcode == Instruction::IF_EQZ) {
-          if (pred_bb->fall_through == bb->id) {
-            // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
-            // it can't be null.
-            ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
-          }
-        } else if (last_opcode == Instruction::IF_NEZ) {
-          if (pred_bb->taken == bb->id) {
-            // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
-            // null.
-            ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+        if (last_insn != nullptr) {
+          Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
+          if (last_opcode == Instruction::IF_EQZ) {
+            if (pred_bb->fall_through == bb->id) {
+              // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
+              // it can't be null.
+              ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+            }
+          } else if (last_opcode == Instruction::IF_NEZ) {
+            if (pred_bb->taken == bb->id) {
+              // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
+              // null.
+              ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+            }
           }
         }
       }
@@ -796,7 +800,7 @@
       continue;
     }
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
@@ -903,7 +907,7 @@
           temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapNullCheck);
       nce_changed = ssa_regs_to_check->GetHighestBitSet() != -1;
       bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
-    } else if (!ssa_regs_to_check->Equal(bb->data_flow_info->ending_check_v)) {
+    } else if (!ssa_regs_to_check->SameBitsSet(bb->data_flow_info->ending_check_v)) {
       nce_changed = true;
       bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
     }
@@ -970,11 +974,9 @@
       }
     };
 
-    typedef std::set<MapEntry, MapEntryComparator, ScopedArenaAllocatorAdapter<MapEntry> >
-        ClassToIndexMap;
-
     ScopedArenaAllocator allocator(&cu_->arena_stack);
-    ClassToIndexMap class_to_index_map(MapEntryComparator(), allocator.Adapter());
+    ScopedArenaSet<MapEntry, MapEntryComparator> class_to_index_map(MapEntryComparator(),
+                                                                    allocator.Adapter());
 
     // First, find all SGET/SPUTs that may need class initialization checks, record INVOKE_STATICs.
     AllNodesIterator iter(this);
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 40ced70..86092b6 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -170,9 +170,8 @@
       }
       mir->ssa_rep = nullptr;
       mir->offset = 2 * i;  // All insns need to be at least 2 code units long.
-      mir->width = 2u;
       mir->optimization_flags = 0u;
-      merged_df_flags |= MIRGraph::oat_data_flow_attributes_[def->opcode];
+      merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode);
     }
     cu_.mir_graph->merged_df_flags_ = merged_df_flags;
 
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index 9457d5b..4ce040e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "base/macros.h"
 namespace art {
 
 // Forward declarations.
@@ -26,42 +27,18 @@
 struct CompilationUnit;
 class Pass;
 
-/**
- * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
- * @details Each enum should be a power of 2 to be correctly used.
- */
-enum OptimizationFlag {
-};
-
-enum DataFlowAnalysisMode {
-  kAllNodes = 0,                           /**< @brief All nodes. */
-  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
-  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
-  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
-  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
-  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
-  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
-  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+// Empty Pass Data Class, can be extended by any pass extending the base Pass class.
+class PassDataHolder {
 };
 
 /**
  * @class Pass
- * @brief Pass is the Pass structure for the optimizations.
- * @details The following structure has the different optimization passes that we are going to do.
+ * @brief Base Pass class, can be extended to perform a more defined way of doing the work call.
  */
 class Pass {
  public:
-  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
-                unsigned int flags = 0u, const char* dump = "")
-    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
-    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, const char* dump)
-    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name)
+    : pass_name_(name) {
   }
 
   virtual ~Pass() {
@@ -71,59 +48,42 @@
     return pass_name_;
   }
 
-  virtual DataFlowAnalysisMode GetTraversal() const {
-    return traversal_type_;
-  }
-
-  virtual bool GetFlag(OptimizationFlag flag) const {
-    return (flags_ & flag);
-  }
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
-  }
-
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
-   * @param c_unit the CompilationUnit.
-   * @return whether or not to execute the pass
+   * @param data the PassDataHolder.
+   * @return whether or not to execute the pass.
    */
-  virtual bool Gate(const CompilationUnit* c_unit) const {
+  virtual bool Gate(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
 
     // Base class says yes.
     return true;
   }
 
   /**
-   * @brief Start of the pass: called before the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(CompilationUnit* c_unit) const {
+  virtual void Start(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief End of the pass: called after the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(CompilationUnit* c_unit) const {
+  virtual void End(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief Actually walk the BasicBlocks following a particular traversal type.
-   * @param c_unit the CompilationUnit.
-   * @param bb the BasicBlock.
+   * @param data the object containing data necessary for the pass.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
-    // Unused parameters.
-    UNUSED(c_unit);
-    UNUSED(bb);
+  virtual bool Worker(const PassDataHolder* data) const {
+    // Unused parameter.
+    UNUSED(data);
 
     // BasicBlock did not change.
     return false;
@@ -133,15 +93,6 @@
   /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
   const char* const pass_name_;
 
-  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
-  const DataFlowAnalysisMode traversal_type_;
-
-  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
-  const unsigned int flags_;
-
-  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
-  const char* const dump_cfg_folder_;
-
  private:
   // In order to make the all passes not copy-friendly.
   DISALLOW_COPY_AND_ASSIGN(Pass);
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
deleted file mode 100644
index 999ed2a..0000000
--- a/compiler/dex/pass_driver.cc
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <dlfcn.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "bb_optimizations.h"
-#include "compiler_internals.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-#include "pass.h"
-#include "pass_driver.h"
-
-namespace art {
-
-namespace {  // anonymous namespace
-
-/**
- * @brief Helper function to create a single instance of a given Pass and can be shared across
- * the threads.
- */
-template <typename PassType>
-const Pass* GetPassInstance() {
-  static const PassType pass;
-  return &pass;
-}
-
-void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass, DataflowIterator* iterator) {
-  // Paranoid: Check the iterator before walking the BasicBlocks.
-  DCHECK(iterator != nullptr);
-
-  bool change = false;
-  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
-    change = pass->WalkBasicBlocks(c_unit, bb);
-  }
-}
-
-template <typename Iterator>
-inline void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass) {
-  Iterator iterator(c_unit->mir_graph.get());
-  DoWalkBasicBlocks(c_unit, pass, &iterator);
-}
-
-}  // anonymous namespace
-
-PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes)
-    : cu_(cu), dump_cfg_folder_("/sdcard/") {
-  DCHECK(cu != nullptr);
-
-  // If need be, create the default passes.
-  if (create_default_passes) {
-    CreatePasses();
-  }
-}
-
-PassDriver::~PassDriver() {
-}
-
-void PassDriver::InsertPass(const Pass* new_pass) {
-  DCHECK(new_pass != nullptr);
-  DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
-
-  // It is an error to override an existing pass.
-  DCHECK(GetPass(new_pass->GetName()) == nullptr)
-      << "Pass name " << new_pass->GetName() << " already used.";
-
-  // Now add to the list.
-  pass_list_.push_back(new_pass);
-}
-
-/*
- * Create the pass list. These passes are immutable and are shared across the threads.
- *
- * Advantage is that there will be no race conditions here.
- * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
- *   - This is not yet an issue: no current pass would require it.
- */
-static const Pass* const gPasses[] = {
-  GetPassInstance<CacheFieldLoweringInfo>(),
-  GetPassInstance<CacheMethodLoweringInfo>(),
-  GetPassInstance<CallInlining>(),
-  GetPassInstance<CodeLayout>(),
-  GetPassInstance<SSATransformation>(),
-  GetPassInstance<ConstantPropagation>(),
-  GetPassInstance<InitRegLocations>(),
-  GetPassInstance<MethodUseCount>(),
-  GetPassInstance<NullCheckEliminationAndTypeInference>(),
-  GetPassInstance<ClassInitCheckElimination>(),
-  GetPassInstance<BBCombine>(),
-  GetPassInstance<BBOptimizations>(),
-};
-
-// The default pass list is used by CreatePasses to initialize pass_list_.
-static std::vector<const Pass*> gDefaultPassList(gPasses, gPasses + arraysize(gPasses));
-
-void PassDriver::CreateDefaultPassList(const std::string& disable_passes) {
-  // Insert each pass from gPasses into gDefaultPassList.
-  gDefaultPassList.clear();
-  gDefaultPassList.reserve(arraysize(gPasses));
-  for (const Pass* pass : gPasses) {
-    // Check if we should disable this pass.
-    if (disable_passes.find(pass->GetName()) != std::string::npos) {
-      LOG(INFO) << "Skipping " << pass->GetName();
-    } else {
-      gDefaultPassList.push_back(pass);
-    }
-  }
-}
-
-void PassDriver::CreatePasses() {
-  // Insert each pass into the list via the InsertPass method.
-  pass_list_.reserve(gDefaultPassList.size());
-  for (const Pass* pass : gDefaultPassList) {
-    InsertPass(pass);
-  }
-}
-
-void PassDriver::HandlePassFlag(CompilationUnit* c_unit, const Pass* pass) {
-  // Unused parameters for the moment.
-  UNUSED(c_unit);
-  UNUSED(pass);
-}
-
-void PassDriver::DispatchPass(CompilationUnit* c_unit, const Pass* curPass) {
-  VLOG(compiler) << "Dispatching " << curPass->GetName();
-
-  DataFlowAnalysisMode mode = curPass->GetTraversal();
-
-  switch (mode) {
-    case kPreOrderDFSTraversal:
-      DoWalkBasicBlocks<PreOrderDfsIterator>(c_unit, curPass);
-      break;
-    case kRepeatingPreOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(c_unit, curPass);
-      break;
-    case kRepeatingPostOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(c_unit, curPass);
-      break;
-    case kReversePostOrderDFSTraversal:
-      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(c_unit, curPass);
-      break;
-    case kRepeatingReversePostOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(c_unit, curPass);
-      break;
-    case kPostOrderDOMTraversal:
-      DoWalkBasicBlocks<PostOrderDOMIterator>(c_unit, curPass);
-      break;
-    case kAllNodes:
-      DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
-      break;
-    case kNoNodes:
-      break;
-    default:
-      LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
-      break;
-  }
-}
-
-void PassDriver::ApplyPass(CompilationUnit* c_unit, const Pass* curPass) {
-  curPass->Start(c_unit);
-  DispatchPass(c_unit, curPass);
-  curPass->End(c_unit);
-}
-
-bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split) {
-  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name.
-  DCHECK(c_unit != nullptr);
-  DCHECK(pass != nullptr);
-  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
-
-  // Do we perform a time split
-  if (time_split) {
-    c_unit->NewTimingSplit(pass->GetName());
-  }
-
-  // Check the pass gate first.
-  bool should_apply_pass = pass->Gate(c_unit);
-
-  if (should_apply_pass) {
-    // Applying the pass: first start, doWork, and end calls.
-    ApplyPass(c_unit, pass);
-
-    // Clean up if need be.
-    HandlePassFlag(c_unit, pass);
-
-    // Do we want to log it?
-    if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
-      // Do we have a pass folder?
-      const char* passFolder = pass->GetDumpCFGFolder();
-      DCHECK(passFolder != nullptr);
-
-      if (passFolder[0] != 0) {
-        // Create directory prefix.
-        std::string prefix = GetDumpCFGFolder();
-        prefix += passFolder;
-        prefix += "/";
-
-        c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
-      }
-    }
-  }
-
-  // If the pass gate passed, we can declare success.
-  return should_apply_pass;
-}
-
-bool PassDriver::RunPass(CompilationUnit* c_unit, const char* pass_name) {
-  // Paranoid: c_unit cannot be nullptr and we need a pass name.
-  DCHECK(c_unit != nullptr);
-  DCHECK(pass_name != nullptr && pass_name[0] != 0);
-
-  const Pass* cur_pass = GetPass(pass_name);
-
-  if (cur_pass != nullptr) {
-    return RunPass(c_unit, cur_pass);
-  }
-
-  // Return false, we did not find the pass.
-  return false;
-}
-
-void PassDriver::Launch() {
-  for (const Pass* cur_pass : pass_list_) {
-    RunPass(cu_, cur_pass, true);
-  }
-}
-
-void PassDriver::PrintPassNames() {
-  LOG(INFO) << "Loop Passes are:";
-
-  for (const Pass* cur_pass : gPasses) {
-    LOG(INFO) << "\t-" << cur_pass->GetName();
-  }
-}
-
-const Pass* PassDriver::GetPass(const char* name) const {
-  for (const Pass* cur_pass : pass_list_) {
-    if (strcmp(name, cur_pass->GetName()) == 0) {
-      return cur_pass;
-    }
-  }
-  return nullptr;
-}
-
-}  // namespace art
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index 2b7196e..aa0d1ae 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -22,77 +22,169 @@
 #include "safe_map.h"
 
 // Forward Declarations.
-class CompilationUnit;
 class Pass;
-
+class PassDriver;
 namespace art {
+/**
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
+ */
+template <typename PassType>
+const Pass* GetPassInstance() {
+  static const PassType pass;
+  return &pass;
+}
+
+// Empty holder for the constructor.
+class PassDriverDataHolder {
+};
 
 /**
  * @class PassDriver
- * @brief PassDriver is the wrapper around all Pass instances in order to execute them from the Middle-End
+ * @brief PassDriver is the wrapper around all Pass instances in order to execute them
  */
+template <typename PassDriverType>
 class PassDriver {
  public:
-  explicit PassDriver(CompilationUnit* cu, bool create_default_passes = true);
+  explicit PassDriver() {
+    InitializePasses();
+  }
 
-  ~PassDriver();
+  virtual ~PassDriver() {
+  }
 
   /**
    * @brief Insert a Pass: can warn if multiple passes have the same name.
-   * @param new_pass the new Pass to insert in the map and list.
-   * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass);
+  void InsertPass(const Pass* new_pass) {
+    DCHECK(new_pass != nullptr);
+    DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
+
+    // It is an error to override an existing pass.
+    DCHECK(GetPass(new_pass->GetName()) == nullptr)
+        << "Pass name " << new_pass->GetName() << " already used.";
+
+    // Now add to the list.
+    pass_list_.push_back(new_pass);
+  }
 
   /**
    * @brief Run a pass using the name as key.
-   * @param c_unit the considered CompilationUnit.
-   * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
+  virtual bool RunPass(const char* pass_name) {
+    // Paranoid: c_unit cannot be nullptr and we need a pass name.
+    DCHECK(pass_name != nullptr && pass_name[0] != 0);
+
+    const Pass* cur_pass = GetPass(pass_name);
+
+    if (cur_pass != nullptr) {
+      return RunPass(cur_pass);
+    }
+
+    // Return false, we did not find the pass.
+    return false;
+  }
+
+  /**
+   * @brief Runs all the passes with the pass_list_.
+   */
+  void Launch() {
+    for (const Pass* cur_pass : pass_list_) {
+      RunPass(cur_pass);
+    }
+  }
+
+  /**
+   * @brief Searches for a particular pass.
+   * @param the name of the pass to be searched for.
+   */
+  const Pass* GetPass(const char* name) const {
+    for (const Pass* cur_pass : pass_list_) {
+      if (strcmp(name, cur_pass->GetName()) == 0) {
+        return cur_pass;
+      }
+    }
+    return nullptr;
+  }
+
+  static void CreateDefaultPassList(const std::string& disable_passes) {
+    // Insert each pass from g_passes into g_default_pass_list.
+    PassDriverType::g_default_pass_list.clear();
+    PassDriverType::g_default_pass_list.reserve(PassDriver<PassDriverType>::g_passes_size);
+    for (uint16_t i = 0; i < PassDriver<PassDriverType>::g_passes_size; ++i) {
+      const Pass* pass = PassDriver<PassDriverType>::g_passes[i];
+      // Check if we should disable this pass.
+      if (disable_passes.find(pass->GetName()) != std::string::npos) {
+        LOG(INFO) << "Skipping " << pass->GetName();
+      } else {
+        PassDriver<PassDriverType>::g_default_pass_list.push_back(pass);
+      }
+    }
+  }
 
   /**
    * @brief Run a pass using the Pass itself.
    * @param time_split do we want a time split request(default: false)?
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split = false);
+  virtual bool RunPass(const Pass* pass, bool time_split = false) = 0;
 
-  void Launch();
+  /**
+   * @brief Print the pass names of all the passes available.
+   */
+  static void PrintPassNames() {
+    LOG(INFO) << "Loop Passes are:";
 
-  void HandlePassFlag(CompilationUnit* c_unit, const Pass* pass);
+    for (const Pass* cur_pass : PassDriver<PassDriverType>::g_default_pass_list) {
+      LOG(INFO) << "\t-" << cur_pass->GetName();
+    }
+  }
+
+ protected:
+  /**
+   * @brief Gets the list of passes currently schedule to execute.
+   * @return pass_list_
+   */
+  std::vector<const Pass*>& GetPasses() {
+    return pass_list_;
+  }
+
+  virtual void InitializePasses() {
+    SetDefaultPasses();
+  }
+
+  void SetDefaultPasses() {
+    pass_list_ = PassDriver<PassDriverType>::g_default_pass_list;
+  }
 
   /**
    * @brief Apply a patch: perform start/work/end functions.
    */
-  void ApplyPass(CompilationUnit* c_unit, const Pass* pass);
-
-  /**
-   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
-   */
-  void DispatchPass(CompilationUnit* c_unit, const Pass* pass);
-
-  static void PrintPassNames();
-  static void CreateDefaultPassList(const std::string& disable_passes);
-
-  const Pass* GetPass(const char* name) const;
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
+  virtual void ApplyPass(PassDataHolder* data, const Pass* pass) {
+    pass->Start(data);
+    DispatchPass(pass);
+    pass->End(data);
   }
-
- protected:
-  void CreatePasses();
+  /**
+   * @brief Dispatch a patch.
+   * Gives the ability to add logic when running the patch.
+   */
+  virtual void DispatchPass(const Pass* pass) {
+    UNUSED(pass);
+  }
 
   /** @brief List of passes: provides the order to execute the passes. */
   std::vector<const Pass*> pass_list_;
 
-  /** @brief The CompilationUnit on which to execute the passes on. */
-  CompilationUnit* const cu_;
+  /** @brief The number of passes within g_passes.  */
+  static const uint16_t g_passes_size;
 
-  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
-  const char* dump_cfg_folder_;
+  /** @brief The number of passes within g_passes.  */
+  static const Pass* const g_passes[];
+
+  /** @brief The default pass list is used to initialize pass_list_. */
+  static std::vector<const Pass*> g_default_pass_list;
 };
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc
new file mode 100644
index 0000000..d054500
--- /dev/null
+++ b/compiler/dex/pass_driver_me.cc
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/macros.h"
+#include "bb_optimizations.h"
+#include "compiler_internals.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+#include "pass_driver_me.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    data->bb = bb;
+    change = pass->Worker(data);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = data->c_unit;
+  DCHECK(c_unit != nullptr);
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(data, pass, &iterator);
+}
+}  // anonymous namespace
+
+/*
+ * Create the pass list. These passes are immutable and are shared across the threads.
+ *
+ * Advantage is that there will be no race conditions here.
+ * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
+ *   - This is not yet an issue: no current pass would require it.
+ */
+// The initial list of passes to be used by the PassDriveME.
+template<>
+const Pass* const PassDriver<PassDriverME>::g_passes[] = {
+  GetPassInstance<CacheFieldLoweringInfo>(),
+  GetPassInstance<CacheMethodLoweringInfo>(),
+  GetPassInstance<CallInlining>(),
+  GetPassInstance<CodeLayout>(),
+  GetPassInstance<SSATransformation>(),
+  GetPassInstance<ConstantPropagation>(),
+  GetPassInstance<InitRegLocations>(),
+  GetPassInstance<MethodUseCount>(),
+  GetPassInstance<NullCheckEliminationAndTypeInference>(),
+  GetPassInstance<ClassInitCheckElimination>(),
+  GetPassInstance<BBCombine>(),
+  GetPassInstance<BBOptimizations>(),
+};
+
+// The number of the passes in the initial list of Passes (g_passes).
+template<>
+uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes);
+
+// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_.
+template<>
+std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size);
+
+PassDriverME::PassDriverME(CompilationUnit* cu)
+    : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
+  pass_me_data_holder_.bb = nullptr;
+  pass_me_data_holder_.c_unit = cu;
+}
+
+PassDriverME::~PassDriverME() {
+}
+
+void PassDriverME::DispatchPass(const Pass* pass) {
+  VLOG(compiler) << "Dispatching " << pass->GetName();
+  const PassME* me_pass = down_cast<const PassME*>(pass);
+
+  DataFlowAnalysisMode mode = me_pass->GetTraversal();
+
+  switch (mode) {
+    case kPreOrderDFSTraversal:
+      DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPreOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kPostOrderDOMTraversal:
+      DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kAllNodes:
+      DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kNoNodes:
+      break;
+    default:
+      LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
+      break;
+  }
+}
+
+bool PassDriverME::RunPass(const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
+  CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
+  DCHECK(c_unit != nullptr);
+
+  // Do we perform a time split
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
+  }
+
+  // Check the pass gate first.
+  bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
+  if (should_apply_pass) {
+    // Applying the pass: first start, doWork, and end calls.
+    ApplyPass(&pass_me_data_holder_, pass);
+
+    // Do we want to log it?
+    if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
+      // Do we have a pass folder?
+      const PassME* me_pass = (down_cast<const PassME*>(pass));
+      const char* passFolder = me_pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
+
+      if (passFolder[0] != 0) {
+        // Create directory prefix.
+        std::string prefix = GetDumpCFGFolder();
+        prefix += passFolder;
+        prefix += "/";
+
+        c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
+      }
+    }
+  }
+
+  // If the pass gate passed, we can declare success.
+  return should_apply_pass;
+}
+
+const char* PassDriverME::GetDumpCFGFolder() const {
+  return dump_cfg_folder_;
+}
+
+
+}  // namespace art
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
new file mode 100644
index 0000000..0142934
--- /dev/null
+++ b/compiler/dex/pass_driver_me.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+
+#include "bb_optimizations.h"
+#include "pass_driver.h"
+#include "pass_me.h"
+
+namespace art {
+
+class PassDriverME: public PassDriver<PassDriverME> {
+ public:
+  explicit PassDriverME(CompilationUnit* cu);
+  ~PassDriverME();
+  /**
+   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
+   */
+  void DispatchPass(const Pass* pass);
+  bool RunPass(const Pass* pass, bool time_split = false);
+  const char* GetDumpCFGFolder() const;
+ protected:
+  /** @brief The data holder that contains data needed for the PassDriverME. */
+  PassMEDataHolder pass_me_data_holder_;
+
+  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
+  const char* dump_cfg_folder_;
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_H_
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
new file mode 100644
index 0000000..069fb45
--- /dev/null
+++ b/compiler/dex/pass_me.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_ME_H_
+#define ART_COMPILER_DEX_PASS_ME_H_
+
+#include <string>
+#include "pass.h"
+
+namespace art {
+
+// Forward declarations.
+struct BasicBlock;
+struct CompilationUnit;
+class Pass;
+
+/**
+ * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
+ * @details Each enum should be a power of 2 to be correctly used.
+ */
+enum OptimizationFlag {
+};
+
+// Data holder class.
+class PassMEDataHolder: public PassDataHolder {
+  public:
+    CompilationUnit* c_unit;
+    BasicBlock* bb;
+};
+
+enum DataFlowAnalysisMode {
+  kAllNodes = 0,                           /**< @brief All nodes. */
+  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
+  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
+  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
+  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
+  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
+  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kTopologicalSortTraversal,               /**< @brief Topological Order traversal. */
+  kRepeatingTopologicalSortTraversal,      /**< @brief Repeating Topological Order traversal. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+};
+
+/**
+ * @class Pass
+ * @brief Pass is the Pass structure for the optimizations.
+ * @details The following structure has the different optimization passes that we are going to do.
+ */
+class PassME: public Pass {
+ public:
+  explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes,
+          unsigned int flags = 0u, const char* dump = "")
+    : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, const char* dump)
+    : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  ~PassME() {
+  }
+
+  virtual DataFlowAnalysisMode GetTraversal() const {
+    return traversal_type_;
+  }
+
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
+
+  bool GetFlag(OptimizationFlag flag) const {
+    return (flags_ & flag);
+  }
+
+ protected:
+  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
+  const DataFlowAnalysisMode traversal_type_;
+
+  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
+  const unsigned int flags_;
+
+  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
+  const char* const dump_cfg_folder_;
+};
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_ME_H_
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 70438ec..576e242 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -722,7 +722,7 @@
   /* Prep Src and Dest locations */
   int next_sreg = 0;
   int next_loc = 0;
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MirGraph::GetDataFlowAttributes(opcode);
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index c9acd66..e384f6b 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -93,29 +93,8 @@
  * +========================+
  */
 
-// Offset to distingish FP regs.
-#define ARM_FP_REG_OFFSET 32
-// Offset to distinguish DP FP regs.
-#define ARM_FP_DOUBLE 64
 // First FP callee save.
 #define ARM_FP_CALLEE_SAVE_BASE 16
-// Reg types.
-#define ARM_REGTYPE(x) (x & (ARM_FP_REG_OFFSET | ARM_FP_DOUBLE))
-#define ARM_FPREG(x) ((x & ARM_FP_REG_OFFSET) == ARM_FP_REG_OFFSET)
-#define ARM_LOWREG(x) ((x & 0x7) == x)
-#define ARM_DOUBLEREG(x) ((x & ARM_FP_DOUBLE) == ARM_FP_DOUBLE)
-#define ARM_SINGLEREG(x) (ARM_FPREG(x) && !ARM_DOUBLEREG(x))
-
-/*
- * Note: the low register of a floating point pair is sufficient to
- * create the name of a double, but require both names to be passed to
- * allow for asserts to verify that the pair is consecutive if significant
- * rework is done in this area.  Also, it is a good reminder in the calling
- * code that reg locations always describe doubles as a pair of singles.
- */
-#define ARM_S2D(x, y) ((x) | ARM_FP_DOUBLE)
-// Mask to strip off fp flags.
-#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET-1)
 
 enum ArmResourceEncodingPos {
   kArmGPReg0   = 0,
@@ -134,135 +113,197 @@
 #define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16)
 
 enum ArmNativeRegisterPool {
-  r0   = 0,
-  r1   = 1,
-  r2   = 2,
-  r3   = 3,
-  rARM_SUSPEND = 4,
-  r5   = 5,
-  r6   = 6,
-  r7   = 7,
-  r8   = 8,
-  rARM_SELF  = 9,
-  r10  = 10,
-  r11  = 11,
-  r12  = 12,
-  r13sp  = 13,
-  rARM_SP  = 13,
-  r14lr  = 14,
-  rARM_LR  = 14,
-  r15pc  = 15,
-  rARM_PC  = 15,
-  fr0  =  0 + ARM_FP_REG_OFFSET,
-  fr1  =  1 + ARM_FP_REG_OFFSET,
-  fr2  =  2 + ARM_FP_REG_OFFSET,
-  fr3  =  3 + ARM_FP_REG_OFFSET,
-  fr4  =  4 + ARM_FP_REG_OFFSET,
-  fr5  =  5 + ARM_FP_REG_OFFSET,
-  fr6  =  6 + ARM_FP_REG_OFFSET,
-  fr7  =  7 + ARM_FP_REG_OFFSET,
-  fr8  =  8 + ARM_FP_REG_OFFSET,
-  fr9  =  9 + ARM_FP_REG_OFFSET,
-  fr10 = 10 + ARM_FP_REG_OFFSET,
-  fr11 = 11 + ARM_FP_REG_OFFSET,
-  fr12 = 12 + ARM_FP_REG_OFFSET,
-  fr13 = 13 + ARM_FP_REG_OFFSET,
-  fr14 = 14 + ARM_FP_REG_OFFSET,
-  fr15 = 15 + ARM_FP_REG_OFFSET,
-  fr16 = 16 + ARM_FP_REG_OFFSET,
-  fr17 = 17 + ARM_FP_REG_OFFSET,
-  fr18 = 18 + ARM_FP_REG_OFFSET,
-  fr19 = 19 + ARM_FP_REG_OFFSET,
-  fr20 = 20 + ARM_FP_REG_OFFSET,
-  fr21 = 21 + ARM_FP_REG_OFFSET,
-  fr22 = 22 + ARM_FP_REG_OFFSET,
-  fr23 = 23 + ARM_FP_REG_OFFSET,
-  fr24 = 24 + ARM_FP_REG_OFFSET,
-  fr25 = 25 + ARM_FP_REG_OFFSET,
-  fr26 = 26 + ARM_FP_REG_OFFSET,
-  fr27 = 27 + ARM_FP_REG_OFFSET,
-  fr28 = 28 + ARM_FP_REG_OFFSET,
-  fr29 = 29 + ARM_FP_REG_OFFSET,
-  fr30 = 30 + ARM_FP_REG_OFFSET,
-  fr31 = 31 + ARM_FP_REG_OFFSET,
-  dr0 = fr0 + ARM_FP_DOUBLE,
-  dr1 = fr2 + ARM_FP_DOUBLE,
-  dr2 = fr4 + ARM_FP_DOUBLE,
-  dr3 = fr6 + ARM_FP_DOUBLE,
-  dr4 = fr8 + ARM_FP_DOUBLE,
-  dr5 = fr10 + ARM_FP_DOUBLE,
-  dr6 = fr12 + ARM_FP_DOUBLE,
-  dr7 = fr14 + ARM_FP_DOUBLE,
-  dr8 = fr16 + ARM_FP_DOUBLE,
-  dr9 = fr18 + ARM_FP_DOUBLE,
-  dr10 = fr20 + ARM_FP_DOUBLE,
-  dr11 = fr22 + ARM_FP_DOUBLE,
-  dr12 = fr24 + ARM_FP_DOUBLE,
-  dr13 = fr26 + ARM_FP_DOUBLE,
-  dr14 = fr28 + ARM_FP_DOUBLE,
-  dr15 = fr30 + ARM_FP_DOUBLE,
+  r0           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
+  r1           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
+  r2           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
+  r3           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
+  rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+  r5           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
+  r6           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
+  r7           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
+  r8           = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
+  rARM_SELF    = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
+  r10          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
+  r11          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
+  r12          = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
+  r13sp        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
+  rARM_SP      = r13sp,
+  r14lr        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
+  rARM_LR      = r14lr,
+  r15pc        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
+  rARM_PC      = r15pc,
+
+  fr0          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
+  fr1          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
+  fr2          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
+  fr3          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
+  fr4          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
+  fr5          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
+  fr6          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
+  fr7          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
+  fr8          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
+  fr9          = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
+  fr10         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
+  fr11         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
+  fr12         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
+  fr13         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
+  fr14         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
+  fr15         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
+  fr16         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
+  fr17         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
+  fr18         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
+  fr19         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
+  fr20         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
+  fr21         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
+  fr22         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
+  fr23         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
+  fr24         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
+  fr25         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
+  fr26         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
+  fr27         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
+  fr28         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
+  fr29         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
+  fr30         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
+  fr31         = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
+
+  dr0          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  dr1          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
+  dr2          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  dr3          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
+  dr4          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  dr5          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
+  dr6          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  dr7          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
+  dr8          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  dr9          = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
+  dr10         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  dr11         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+  dr12         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  dr13         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+  dr14         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  dr15         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+#if 0
+  // Enable when def/use and runtime able to handle these.
+  dr16         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  dr17         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
+  dr18         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  dr19         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
+  dr20         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  dr21         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
+  dr22         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  dr23         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
+  dr24         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  dr25         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
+  dr26         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  dr27         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
+  dr28         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  dr29         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
+  dr30         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
+  dr31         = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
+#endif
 };
 
-// TODO: clean this up; reduce use of or eliminate macros
+constexpr RegStorage rs_r0(RegStorage::kValid | r0);
+constexpr RegStorage rs_r1(RegStorage::kValid | r1);
+constexpr RegStorage rs_r2(RegStorage::kValid | r2);
+constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
+constexpr RegStorage rs_r5(RegStorage::kValid | r5);
+constexpr RegStorage rs_r6(RegStorage::kValid | r6);
+constexpr RegStorage rs_r7(RegStorage::kValid | r7);
+constexpr RegStorage rs_r8(RegStorage::kValid | r8);
+constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF);
+constexpr RegStorage rs_r10(RegStorage::kValid | r10);
+constexpr RegStorage rs_r11(RegStorage::kValid | r11);
+constexpr RegStorage rs_r12(RegStorage::kValid | r12);
+constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp);
+constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP);
+constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr);
+constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR);
+constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc);
+constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC);
+constexpr RegStorage rs_invalid(RegStorage::kInvalid);
 
-const RegStorage rs_r0(RegStorage::k32BitSolo, r0);
-const RegStorage rs_r1(RegStorage::k32BitSolo, r1);
-const RegStorage rs_r2(RegStorage::k32BitSolo, r2);
-const RegStorage rs_r3(RegStorage::k32BitSolo, r3);
-const RegStorage rs_rARM_SUSPEND(RegStorage::k32BitSolo, rARM_SUSPEND);
-const RegStorage rs_r5(RegStorage::k32BitSolo, r5);
-const RegStorage rs_r6(RegStorage::k32BitSolo, r6);
-const RegStorage rs_r7(RegStorage::k32BitSolo, r7);
-const RegStorage rs_r8(RegStorage::k32BitSolo, r8);
-const RegStorage rs_rARM_SELF(RegStorage::k32BitSolo, rARM_SELF);
-const RegStorage rs_r10(RegStorage::k32BitSolo, r10);
-const RegStorage rs_r11(RegStorage::k32BitSolo, r11);
-const RegStorage rs_r12(RegStorage::k32BitSolo, r12);
-const RegStorage rs_r13sp(RegStorage::k32BitSolo, r13sp);
-const RegStorage rs_rARM_SP(RegStorage::k32BitSolo, rARM_SP);
-const RegStorage rs_r14lr(RegStorage::k32BitSolo, r14lr);
-const RegStorage rs_rARM_LR(RegStorage::k32BitSolo, rARM_LR);
-const RegStorage rs_r15pc(RegStorage::k32BitSolo, r15pc);
-const RegStorage rs_rARM_PC(RegStorage::k32BitSolo, rARM_PC);
-const RegStorage rs_invalid(RegStorage::kInvalid);
+constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
+constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
+constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
+constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
+constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
+constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
+constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
+constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
+constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
+constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
+constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
+constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
+constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
+constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
+constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
+constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
+constexpr RegStorage rs_fr16(RegStorage::kValid | fr16);
+constexpr RegStorage rs_fr17(RegStorage::kValid | fr17);
+constexpr RegStorage rs_fr18(RegStorage::kValid | fr18);
+constexpr RegStorage rs_fr19(RegStorage::kValid | fr19);
+constexpr RegStorage rs_fr20(RegStorage::kValid | fr20);
+constexpr RegStorage rs_fr21(RegStorage::kValid | fr21);
+constexpr RegStorage rs_fr22(RegStorage::kValid | fr22);
+constexpr RegStorage rs_fr23(RegStorage::kValid | fr23);
+constexpr RegStorage rs_fr24(RegStorage::kValid | fr24);
+constexpr RegStorage rs_fr25(RegStorage::kValid | fr25);
+constexpr RegStorage rs_fr26(RegStorage::kValid | fr26);
+constexpr RegStorage rs_fr27(RegStorage::kValid | fr27);
+constexpr RegStorage rs_fr28(RegStorage::kValid | fr28);
+constexpr RegStorage rs_fr29(RegStorage::kValid | fr29);
+constexpr RegStorage rs_fr30(RegStorage::kValid | fr30);
+constexpr RegStorage rs_fr31(RegStorage::kValid | fr31);
 
-// Target-independent aliases.
-#define rARM_ARG0 r0
-#define rs_rARM_ARG0 rs_r0
-#define rARM_ARG1 r1
-#define rs_rARM_ARG1 rs_r1
-#define rARM_ARG2 r2
-#define rs_rARM_ARG2 rs_r2
-#define rARM_ARG3 r3
-#define rs_rARM_ARG3 rs_r3
-#define rARM_FARG0 r0
-#define rs_ARM_FARG0 rs_r0
-#define rARM_FARG1 r1
-#define rs_rARM_FARG1 rs_r1
-#define rARM_FARG2 r2
-#define rs_rARM_FARG2 rs_r2
-#define rARM_FARG3 r3
-#define rs_rARM_FARG3 rs_r3
-#define rARM_RET0 r0
-#define rs_rARM_RET0 rs_r0
-#define rARM_RET1 r1
-#define rs_rARM_RET1 rs_r1
-#define rARM_INVOKE_TGT rARM_LR
-#define rs_rARM_INVOKE_TGT rs_rARM_LR
-#define rARM_COUNT RegStorage::kInvalidRegVal
+constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
+constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
+constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
+constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
+constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
+constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
+constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
+constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
+constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
+constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
+constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
+constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
+constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
+constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
+constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
+constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
+#if 0
+constexpr RegStorage rs_dr16(RegStorage::kValid | dr16);
+constexpr RegStorage rs_dr17(RegStorage::kValid | dr17);
+constexpr RegStorage rs_dr18(RegStorage::kValid | dr18);
+constexpr RegStorage rs_dr19(RegStorage::kValid | dr19);
+constexpr RegStorage rs_dr20(RegStorage::kValid | dr20);
+constexpr RegStorage rs_dr21(RegStorage::kValid | dr21);
+constexpr RegStorage rs_dr22(RegStorage::kValid | dr22);
+constexpr RegStorage rs_dr23(RegStorage::kValid | dr23);
+constexpr RegStorage rs_dr24(RegStorage::kValid | dr24);
+constexpr RegStorage rs_dr25(RegStorage::kValid | dr25);
+constexpr RegStorage rs_dr26(RegStorage::kValid | dr26);
+constexpr RegStorage rs_dr27(RegStorage::kValid | dr27);
+constexpr RegStorage rs_dr28(RegStorage::kValid | dr28);
+constexpr RegStorage rs_dr29(RegStorage::kValid | dr29);
+constexpr RegStorage rs_dr30(RegStorage::kValid | dr30);
+constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
+#endif
 
 // RegisterLocation templates return values (r0, or r0/r1).
 const RegLocation arm_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
 
 enum ArmShiftEncodings {
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index f77b0a6..a895e6e 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1137,24 +1137,25 @@
                 bits |= value;
                 break;
               case kFmtDfp: {
-                DCHECK(ARM_DOUBLEREG(operand));
-                DCHECK_EQ((operand & 0x1), 0U);
-                uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1;
+                DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
+                uint32_t reg_num = RegStorage::RegNum(operand);
                 /* Snag the 1-bit slice and position it */
-                value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
+                value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end;
                 /* Extract and position the 4-bit slice */
-                value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
+                value |= (reg_num & 0x0f) << encoder->field_loc[i].start;
                 bits |= value;
                 break;
               }
-              case kFmtSfp:
-                DCHECK(ARM_SINGLEREG(operand));
+              case kFmtSfp: {
+                DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
+                uint32_t reg_num = RegStorage::RegNum(operand);
                 /* Snag the 1-bit slice and position it */
-                value = (operand & 0x1) << encoder->field_loc[i].end;
+                value = (reg_num & 0x1) << encoder->field_loc[i].end;
                 /* Extract and position the 4-bit slice */
-                value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
+                value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start;
                 bits |= value;
                 break;
+              }
               case kFmtImm12:
               case kFmtModImm:
                 value = ((operand & 0x800) >> 11) << 26;
@@ -1212,13 +1213,13 @@
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
   CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  data_offset_ = RoundUp(starting_offset, 4);
   int32_t offset_adjustment;
   AssignDataOffsets();
 
   /*
-   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for non-visited nodes).
-   * Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for
+   * non-visited nodes).  Start at zero here, and bit will be flipped to 1 on entry to the loop.
    */
   int generation = 0;
   while (true) {
@@ -1244,7 +1245,7 @@
         case kFixupNone:
           break;
         case kFixupVLoad:
-          if (lir->operands[1] != r15pc) {
+          if (lir->operands[1] != rs_r15pc.GetReg()) {
             break;
           }
           // NOTE: intentional fallthrough.
@@ -1285,7 +1286,8 @@
              * happens.
              */
             int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
-                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] : rARM_LR;
+                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] :
+                            rs_rARM_LR.GetReg();
 
             // Add new Adr to generate the address.
             LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
@@ -1500,7 +1502,8 @@
           EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
           LIR* target = lir->target;
           int32_t target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
-              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
+              offset_adjustment);
           int32_t disp = target_disp - ((lir->offset + 4) & ~3);
           if (disp < 4096) {
             lir->operands[1] = disp;
@@ -1533,12 +1536,12 @@
             prev_lir = new_mov16H;  // Now we've got a new prev.
 
             offset_adjustment -= lir->flags.size;
-            if (ARM_LOWREG(lir->operands[0])) {
+            if (RegStorage::RegNum(lir->operands[0]) < 8) {
               lir->opcode = kThumbAddRRLH;
             } else {
               lir->opcode = kThumbAddRRHH;
             }
-            lir->operands[1] = rARM_PC;
+            lir->operands[1] = rs_rARM_PC.GetReg();
             lir->flags.size = EncodingMap[lir->opcode].size;
             offset_adjustment += lir->flags.size;
             // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
@@ -1593,7 +1596,7 @@
         LOG(FATAL) << "Assembler error - too many retries";
       }
       starting_offset += offset_adjustment;
-      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      data_offset_ = RoundUp(starting_offset, 4);
       AssignDataOffsets();
     }
   }
@@ -1606,7 +1609,7 @@
   write_pos = EncodeLIRs(write_pos, first_lir_insn_);
   DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+  DCHECK_EQ(data_offset_, RoundUp(code_buffer_.size(), 4));
 
   // Install literals
   InstallLiteralPools();
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 9cb56cf..5d74b8d 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -54,8 +54,7 @@
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
-                                                     kArenaAllocLIR));
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.Insert(tab_rec);
 
   // Get the switch value
@@ -78,7 +77,7 @@
   // Establish loop branch target
   LIR* target = NewLIR0(kPseudoTargetLabel);
   // Load next key/disp
-  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetReg()) | (1 << r_disp.GetReg()));
+  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum()));
   OpRegReg(kOpCmp, r_key, rl_src.reg);
   // Go if match. NOTE: No instruction set switch here - must stay Thumb2
   LIR* it = OpIT(kCondEq, "");
@@ -168,7 +167,7 @@
   LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
                rs_rARM_LR);
   // Materialize a pointer to the fill data image
-  NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec));
+  NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec));
   ClobberCallerSave();
   LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
   MarkSafepointPC(call_inst);
@@ -195,10 +194,12 @@
       }
     }
     Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
-    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
     LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
 
 
@@ -221,16 +222,19 @@
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
     Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
-    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
     LIR* it = OpIT(kCondEq, "");
-    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
+        mirror::Object::MonitorOffset().Int32Value() >> 2);
     OpEndIT(it);
     OpRegImm(kOpCmp, rs_r1, 0);
     it = OpIT(kCondNe, "T");
     // Go expensive route - artLockObjectFromCode(self, obj);
-    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
+    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(),
+                       rs_rARM_LR);
     ClobberCallerSave();
     LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
     OpEndIT(it);
@@ -264,6 +268,7 @@
     MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
+    GenMemBarrier(kStoreLoad);
     Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
@@ -281,7 +286,6 @@
 
     LIR* success_target = NewLIR0(kPseudoTargetLabel);
     unlock_success_branch->target = success_target;
-    GenMemBarrier(kStoreLoad);
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
@@ -291,7 +295,11 @@
     LoadConstantNoClobber(rs_r3, 0);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
     OpRegReg(kOpCmp, rs_r1, rs_r2);
+
     LIR* it = OpIT(kCondEq, "EE");
+    if (GenMemBarrier(kStoreLoad)) {
+      UpdateIT(it, "TEE");
+    }
     Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     // Go expensive route - UnlockObjectFromCode(obj);
     LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
@@ -300,7 +308,6 @@
     LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
     OpEndIT(it);
     MarkSafepointPC(call_inst);
-    GenMemBarrier(kStoreLoad);
   }
 }
 
@@ -339,10 +346,10 @@
    * expanding the frame or flushing.  This leaves the utility
    * code with a single temp: r12.  This should be enough.
    */
-  LockTemp(r0);
-  LockTemp(r1);
-  LockTemp(r2);
-  LockTemp(r3);
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
+  LockTemp(rs_r2);
+  LockTemp(rs_r3);
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -352,10 +359,30 @@
                             (static_cast<size_t>(frame_size_) <
                             Thread::kStackOverflowReservedBytes));
   NewLIR0(kPseudoMethodEntry);
+  bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
-      /* Load stack limit */
-      Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+      if (!large_frame) {
+        /* Load stack limit */
+        LockTemp(rs_r12);
+        Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+      }
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      //
+      // This is done before the callee save instructions to avoid any possibility
+      // of these overflowing.  This uses r12 and that's never saved in a callee
+      // save.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      Load32Disp(rs_r12, 0, rs_r12);
+      MarkPossibleStackOverflowException();
     }
   }
   /* Spill core callee saves */
@@ -400,31 +427,32 @@
         const bool restore_lr_;
         const size_t sp_displace_;
       };
-      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
+      if (large_frame) {
+        // Note: may need a temp reg, and we only have r12 free at this point.
         OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
+        Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
         LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
         // Need to restore LR since we used it as a temp.
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
         OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
       } else {
-        // If the frame is small enough we are guaranteed to have enough space that remains to
-        // handle signals on the user stack.
+        /*
+         * If the frame is small enough we are guaranteed to have enough space that remains to
+         * handle signals on the user stack.  However, we may not have any free temp
+         * registers at this point, so we'll temporarily add LR to the temp pool.
+         */
+        DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
+        MarkTemp(rs_rARM_LR);
+        FreeTemp(rs_rARM_LR);
         OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+        Clobber(rs_rARM_LR);
+        UnmarkTemp(rs_rARM_LR);
         LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
       }
     } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
+      // Implicit stack overflow check has already been done.  Just make room on the
+      // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
@@ -433,10 +461,11 @@
 
   FlushIns(ArgLocs, rl_method);
 
-  FreeTemp(r0);
-  FreeTemp(r1);
-  FreeTemp(r2);
-  FreeTemp(r3);
+  FreeTemp(rs_r0);
+  FreeTemp(rs_r1);
+  FreeTemp(rs_r2);
+  FreeTemp(rs_r3);
+  FreeTemp(rs_r12);
 }
 
 void ArmMir2Lir::GenExitSequence() {
@@ -445,8 +474,8 @@
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
    */
-  LockTemp(r0);
-  LockTemp(r1);
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
 
   NewLIR0(kPseudoMethodExit);
   OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
@@ -454,20 +483,20 @@
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
   }
-  if (core_spill_mask_ & (1 << rARM_LR)) {
+  if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
     /* Unspill rARM_LR to rARM_PC */
-    core_spill_mask_ &= ~(1 << rARM_LR);
-    core_spill_mask_ |= (1 << rARM_PC);
+    core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
+    core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
   NewLIR1(kThumb2Pop, core_spill_mask_);
-  if (!(core_spill_mask_ & (1 << rARM_PC))) {
+  if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
-    NewLIR1(kThumbBx, rARM_LR);
+    NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }
 }
 
 void ArmMir2Lir::GenSpecialExitSequence() {
-  NewLIR1(kThumbBx, rARM_LR);
+  NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 646859c..f0a9ca4 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -31,31 +31,29 @@
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) OVERRIDE;
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, RegStorage r_dest_hi, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                               OpSize size) OVERRIDE;
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, RegStorage r_src_hi, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    bool IsFpReg(int reg);
-    bool IsFpReg(RegStorage reg);
-    bool SameRegType(int reg1, int reg2);
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
-    int S2d(int low_reg, int high_reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -64,17 +62,15 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint32_t FpRegMask();
-    uint64_t GetRegMaskCommon(int reg);
+    uint64_t GetRegMaskCommon(RegStorage reg);
     void AdjustSpillMask();
     void ClobberCallerSave();
-    void FlushReg(RegStorage reg);
-    void FlushRegWide(RegStorage reg);
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, int reg);
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
+    RegStorage AllocPreservedDouble(int s_reg);
 
     // Required for target - miscellaneous.
     void AssembleLIR();
@@ -92,6 +88,11 @@
     int GetInsnSize(LIR* lir);
     bool IsUnconditionalBranch(LIR* lir);
 
+    // Check support for volatile load/store of a given size.
+    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+    // Get the register class for load/store of a field.
+    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2);
@@ -119,6 +120,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -126,6 +128,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -137,7 +141,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
@@ -156,6 +160,7 @@
     LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
     LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpIT(ConditionCode cond, const char* guide);
+    void UpdateIT(LIR* it, const char* new_guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
     LIR* OpPcRelLoad(RegStorage reg, LIR* target);
@@ -171,15 +176,16 @@
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
     LIR* OpVldm(RegStorage r_base, int count);
     LIR* OpVstm(RegStorage r_base, int count);
     void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
     void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val);
+    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
 
-    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                          int s_reg);
+    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index d72f596..bb02f74 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -111,13 +111,11 @@
   rl_result = EvalLoc(rl_dest, kFPReg, true);
   DCHECK(rl_dest.wide);
   DCHECK(rl_result.wide);
-  NewLIR3(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()),
-          S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
-void ArmMir2Lir::GenConversion(Instruction::Code opcode,
-                               RegLocation rl_dest, RegLocation rl_src) {
+void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
   int op = kThumbBkpt;
   int src_reg;
   RegLocation rl_result;
@@ -143,19 +141,16 @@
       break;
     case Instruction::LONG_TO_DOUBLE: {
       rl_src = LoadValueWide(rl_src, kFPReg);
-      src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
+      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
+      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
       rl_result = EvalLoc(rl_dest, kFPReg, true);
-      // TODO: fix AllocTempDouble to return a k64BitSolo double reg and lose the ARM_FP_DOUBLE.
       RegStorage tmp1 = AllocTempDouble();
       RegStorage tmp2 = AllocTempDouble();
 
-      // FIXME: needs 64-bit register cleanup.
-      NewLIR2(kThumb2VcvtF64S32, tmp1.GetLowReg() | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE) + 1);
-      NewLIR2(kThumb2VcvtF64U32, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-              (src_reg & ~ARM_FP_DOUBLE));
+      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
+      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
       LoadConstantWide(tmp2, 0x41f0000000000000LL);
-      NewLIR3(kThumb2VmlaF64, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-              tmp1.GetLowReg() | ARM_FP_DOUBLE, tmp2.GetLowReg() | ARM_FP_DOUBLE);
+      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
       FreeTemp(tmp1);
       FreeTemp(tmp2);
       StoreValueWide(rl_dest, rl_result);
@@ -166,23 +161,20 @@
       return;
     case Instruction::LONG_TO_FLOAT: {
       rl_src = LoadValueWide(rl_src, kFPReg);
-      src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
+      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
+      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
       rl_result = EvalLoc(rl_dest, kFPReg, true);
       // Allocate temp registers.
       RegStorage high_val = AllocTempDouble();
       RegStorage low_val = AllocTempDouble();
       RegStorage const_val = AllocTempDouble();
       // Long to double.
-      NewLIR2(kThumb2VcvtF64S32, high_val.GetLowReg() | ARM_FP_DOUBLE,
-              (src_reg & ~ARM_FP_DOUBLE) + 1);
-      NewLIR2(kThumb2VcvtF64U32, low_val.GetLowReg() | ARM_FP_DOUBLE,
-              (src_reg & ~ARM_FP_DOUBLE));
+      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
+      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
       LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
-      NewLIR3(kThumb2VmlaF64, low_val.GetLowReg() | ARM_FP_DOUBLE,
-              high_val.GetLowReg() | ARM_FP_DOUBLE,
-              const_val.GetLowReg() | ARM_FP_DOUBLE);
+      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
       // Double to float.
-      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetLowReg() | ARM_FP_DOUBLE);
+      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
       // Free temp registers.
       FreeTemp(high_val);
       FreeTemp(low_val);
@@ -199,14 +191,14 @@
   }
   if (rl_src.wide) {
     rl_src = LoadValueWide(rl_src, kFPReg);
-    src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
+    src_reg = rl_src.reg.GetReg();
   } else {
     rl_src = LoadValue(rl_src, kFPReg);
     src_reg = rl_src.reg.GetReg();
   }
   if (rl_dest.wide) {
     rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), src_reg);
+    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
     StoreValueWide(rl_dest, rl_result);
   } else {
     rl_result = EvalLoc(rl_dest, kFPReg, true);
@@ -225,8 +217,7 @@
     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
     rl_src1 = LoadValueWide(rl_src1, kFPReg);
     rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(kThumb2Vcmpd, S2d(rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg()),
-            S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
+    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = mir_graph_->GetSrc(mir, 0);
     rl_src2 = mir_graph_->GetSrc(mir, 1);
@@ -300,8 +291,7 @@
     ClobberSReg(rl_dest.s_reg_low);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     LoadConstant(rl_result.reg, default_result);
-    NewLIR2(kThumb2Vcmpd, S2d(rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg()),
-            S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
+    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = LoadValue(rl_src1, kFPReg);
     rl_src2 = LoadValue(rl_src2, kFPReg);
@@ -311,7 +301,7 @@
     LoadConstant(rl_result.reg, default_result);
     NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   }
-  DCHECK(!ARM_FPREG(rl_result.reg.GetReg()));
+  DCHECK(!rl_result.reg.IsFloat());
   NewLIR0(kThumb2Fmstat);
 
   LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
@@ -338,8 +328,7 @@
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kFPReg);
   rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kThumb2Vnegd, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-          S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()));
+  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -350,19 +339,16 @@
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  // TODO: shouldn't need S2d once 64bitSolo has proper double tag bit.
-  NewLIR2(kThumb2Vsqrtd, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-          S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()));
-  NewLIR2(kThumb2Vcmpd, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-          S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()));
+  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
   NewLIR0(kThumb2Fmstat);
   branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
   RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
-  NewLIR3(kThumb2Fmrrd, r0, r1, S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()));
+  NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
   NewLIR1(kThumbBlxR, r_tgt.GetReg());
-  NewLIR3(kThumb2Fmdrr, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), r0, r1);
+  NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
   branch->target = NewLIR0(kPseudoTargetLabel);
   StoreValueWide(rl_dest, rl_result);
   return true;
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index a2d6373..2556788 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -67,6 +67,34 @@
   return NewLIR2(kThumb2It, code, mask);
 }
 
+void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
+  int mask;
+  int mask3 = 0;
+  int mask2 = 0;
+  int mask1 = 0;
+  ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
+  int cond_bit = code & 1;
+  int alt_bit = cond_bit ^ 1;
+
+  // Note: case fallthroughs intentional
+  switch (strlen(new_guide)) {
+    case 3:
+      mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
+    case 2:
+      mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
+    case 1:
+      mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
+      break;
+    case 0:
+      break;
+    default:
+      LOG(FATAL) << "OAT: bad case in UpdateIT";
+  }
+  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
+      (1 << (3 - strlen(new_guide)));
+  it->operands[1] = mask;
+}
+
 void ArmMir2Lir::OpEndIT(LIR* it) {
   // TODO: use the 'it' pointer to do some checks with the LIR, for example
   //       we could check that the number of instructions matches the mask
@@ -256,10 +284,10 @@
     ccode = FlipComparisonOrder(ccode);
   }
   if (rl_src2.is_const) {
-    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWide(rl_src2);
     // Do special compare/branch against simple const operand if not already in registers.
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_temp.location != kLocPhysReg) &&
+    if ((rl_src2.location != kLocPhysReg) &&
         ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
       return;
@@ -322,7 +350,7 @@
    */
   bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
   skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
-  if (!skip && (ARM_LOWREG(reg.GetReg())) && (check_value == 0) &&
+  if (!skip && reg.Low8() && (check_value == 0) &&
      ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg.GetReg(), 0);
@@ -344,13 +372,13 @@
   if (r_src.IsPair()) {
     r_src = r_src.GetLow();
   }
-  if (ARM_FPREG(r_dest.GetReg()) || ARM_FPREG(r_src.GetReg()))
+  if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
-  if (ARM_LOWREG(r_dest.GetReg()) && ARM_LOWREG(r_src.GetReg()))
+  if (r_dest.Low8() && r_src.Low8())
     opcode = kThumbMovRR;
-  else if (!ARM_LOWREG(r_dest.GetReg()) && !ARM_LOWREG(r_src.GetReg()))
+  else if (!r_dest.Low8() && !r_src.Low8())
      opcode = kThumbMovRR_H2H;
-  else if (ARM_LOWREG(r_dest.GetReg()))
+  else if (r_dest.Low8())
      opcode = kThumbMovRR_H2L;
   else
      opcode = kThumbMovRR_L2H;
@@ -370,21 +398,19 @@
 
 void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
   if (r_dest != r_src) {
-    bool dest_fp = ARM_FPREG(r_dest.GetLowReg());
-    bool src_fp = ARM_FPREG(r_src.GetLowReg());
+    bool dest_fp = r_dest.IsFloat();
+    bool src_fp = r_src.IsFloat();
+    DCHECK(r_dest.Is64Bit());
+    DCHECK(r_src.Is64Bit());
     if (dest_fp) {
       if (src_fp) {
-        // FIXME: handle 64-bit solo's here.
-        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
+        OpRegCopy(r_dest, r_src);
       } else {
-        NewLIR3(kThumb2Fmdrr, S2d(r_dest.GetLowReg(), r_dest.GetHighReg()),
-                r_src.GetLowReg(), r_src.GetHighReg());
+        NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
       }
     } else {
       if (src_fp) {
-        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), S2d(r_src.GetLowReg(),
-                r_src.GetHighReg()));
+        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
       } else {
         // Handle overlap
         if (r_src.GetHighReg() == r_dest.GetLowReg()) {
@@ -694,7 +720,7 @@
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
     StoreValue(rl_dest, rl_result);
   }
   return true;
@@ -727,6 +753,10 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
+void ArmMir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+}
+
 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
@@ -747,16 +777,18 @@
   // around the potentially locked temp by using LR for r_ptr, unconditionally.
   // TODO: Pass information about the need for more temps to the stack frame generation
   // code so that we can rely on being able to allocate enough temps.
-  DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp);
-  MarkTemp(rARM_LR);
-  FreeTemp(rARM_LR);
-  LockTemp(rARM_LR);
+  DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
+  MarkTemp(rs_rARM_LR);
+  FreeTemp(rs_rARM_LR);
+  LockTemp(rs_rARM_LR);
   bool load_early = true;
   if (is_long) {
-    int expected_reg = is_long ? rl_src_expected.reg.GetLowReg() : rl_src_expected.reg.GetReg();
-    int new_val_reg = is_long ? rl_src_new_value.reg.GetLowReg() : rl_src_new_value.reg.GetReg();
-    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !IsFpReg(expected_reg);
-    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !IsFpReg(new_val_reg);
+    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
+        rl_src_expected.reg;
+    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
+        rl_src_new_value.reg;
+    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
+    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
     bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
     bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
 
@@ -802,9 +834,9 @@
 
   // Free now unneeded rl_object and rl_offset to give more temps.
   ClobberSReg(rl_object.s_reg_low);
-  FreeTemp(rl_object.reg.GetReg());
+  FreeTemp(rl_object.reg);
   ClobberSReg(rl_offset.s_reg_low);
-  FreeTemp(rl_offset.reg.GetReg());
+  FreeTemp(rl_offset.reg);
 
   RegLocation rl_expected;
   if (!is_long) {
@@ -813,9 +845,9 @@
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
     // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
-    int low_reg = AllocTemp().GetReg();
-    int high_reg = AllocTemp().GetReg();
-    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
     rl_expected = rl_new_value;
   }
 
@@ -840,7 +872,7 @@
       LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
     }
     // Make sure we use ORR that sets the ccode
-    if (ARM_LOWREG(r_tmp.GetReg()) && ARM_LOWREG(r_tmp_high.GetReg())) {
+    if (r_tmp.Low8() && r_tmp_high.Low8()) {
       NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
     } else {
       NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
@@ -881,8 +913,8 @@
   StoreValue(rl_dest, rl_result);
 
   // Now, restore lr to its non-temp status.
-  Clobber(rARM_LR);
-  UnmarkTemp(rARM_LR);
+  Clobber(rs_rARM_LR);
+  UnmarkTemp(rs_rARM_LR);
   return true;
 }
 
@@ -891,11 +923,11 @@
 }
 
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vldms, r_base.GetReg(), fr0, count);
+  return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
 }
 
 LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
-  return NewLIR3(kThumb2Vstms, r_base.GetReg(), fr0, count);
+  return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
 }
 
 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
@@ -918,7 +950,7 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
-  NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1);
+  NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
 }
 
@@ -930,7 +962,7 @@
   return OpCondBranch(c_code, target);
 }
 
-void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
   LIR* barrier = last_lir_insn_;
@@ -948,18 +980,33 @@
       break;
   }
 
+  bool ret = false;
+
   // If the same barrier already exists, don't generate another.
   if (barrier == nullptr
       || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
     barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+    ret = true;
   }
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
   DCHECK(!barrier->flags.use_def_invalid);
   barrier->u.m.def_mask = ENCODE_ALL;
+  return ret;
+#else
+  return false;
 #endif
 }
 
+void ArmMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void ArmMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -1012,9 +1059,9 @@
     RegStorage res_lo;
     RegStorage res_hi;
     bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
-        !IsTemp(rl_dest.reg.GetLowReg()) && !IsTemp(rl_dest.reg.GetHighReg());
-    bool src1_promoted = !IsTemp(rl_src1.reg.GetLowReg()) && !IsTemp(rl_src1.reg.GetHighReg());
-    bool src2_promoted = !IsTemp(rl_src2.reg.GetLowReg()) && !IsTemp(rl_src2.reg.GetHighReg());
+        !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
+    bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
+    bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
     // Check if rl_dest is *not* either operand and we have enough temp registers.
     if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
         (dest_promoted || src1_promoted || src2_promoted)) {
@@ -1036,10 +1083,10 @@
     }
 
     // Temporarily add LR to the temp pool, and assign it to tmp1
-    MarkTemp(rARM_LR);
-    FreeTemp(rARM_LR);
+    MarkTemp(rs_rARM_LR);
+    FreeTemp(rs_rARM_LR);
     RegStorage tmp1 = rs_rARM_LR;
-    LockTemp(rARM_LR);
+    LockTemp(rs_rARM_LR);
 
     if (rl_src1.reg == rl_src2.reg) {
       DCHECK(res_hi.Valid());
@@ -1054,7 +1101,9 @@
         DCHECK(!res_hi.Valid());
         DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
         DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
-        FreeTemp(rl_src1.reg.GetHighReg());
+        // Will force free src1_hi, so must clobber.
+        Clobber(rl_src1.reg);
+        FreeTemp(rl_src1.reg.GetHigh());
         res_hi = AllocTemp();
       }
       DCHECK(res_hi.Valid());
@@ -1065,16 +1114,14 @@
               tmp1.GetReg());
       NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
       if (reg_status == 2) {
-        // Clobber rl_src1 since it was corrupted.
-        FreeTemp(rl_src1.reg);
-        Clobber(rl_src1.reg);
+        FreeTemp(rl_src1.reg.GetLow());
       }
     }
 
     // Now, restore lr to its non-temp status.
     FreeTemp(tmp1);
-    Clobber(rARM_LR);
-    UnmarkTemp(rARM_LR);
+    Clobber(rs_rARM_LR);
+    UnmarkTemp(rs_rARM_LR);
 
     if (reg_status != 0) {
       // We had manually allocated registers for rl_result.
@@ -1116,7 +1163,7 @@
  */
 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
   RegLocation rl_result;
@@ -1158,7 +1205,7 @@
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTemp();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
-      FreeTemp(rl_index.reg.GetReg());
+      FreeTemp(rl_index.reg);
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
 
@@ -1170,26 +1217,21 @@
       }
       FreeTemp(reg_len);
     }
+    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+    if (!constant_index) {
+      FreeTemp(reg_ptr);
+    }
     if (rl_dest.wide) {
-      LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG);
-      MarkPossibleNullPointerException(opt_flags);
-      if (!constant_index) {
-        FreeTemp(reg_ptr);
-      }
       StoreValueWide(rl_dest, rl_result);
     } else {
-      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
-      MarkPossibleNullPointerException(opt_flags);
-      if (!constant_index) {
-        FreeTemp(reg_ptr);
-      }
       StoreValue(rl_dest, rl_result);
     }
   } else {
     // Offset base, then use indexed load
     RegStorage reg_ptr = AllocTemp();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    FreeTemp(rl_array.reg.GetReg());
+    FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
 
     if (needs_range_check) {
@@ -1209,7 +1251,7 @@
  */
 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   bool constant_index = rl_index.is_const;
 
@@ -1234,8 +1276,8 @@
   bool allocated_reg_ptr_temp = false;
   if (constant_index) {
     reg_ptr = rl_array.reg;
-  } else if (IsTemp(rl_array.reg.GetReg()) && !card_mark) {
-    Clobber(rl_array.reg.GetReg());
+  } else if (IsTemp(rl_array.reg) && !card_mark) {
+    Clobber(rl_array.reg);
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
@@ -1275,11 +1317,7 @@
       FreeTemp(reg_len);
     }
 
-    if (rl_src.wide) {
-      StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg);
-    } else {
-      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
-    }
+    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     /* reg_ptr -> array data */
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 305e89b..1520c52 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -25,16 +25,42 @@
 
 namespace art {
 
-static int core_regs[] = {r0, r1, r2, r3, rARM_SUSPEND, r5, r6, r7, r8, rARM_SELF, r10,
-                         r11, r12, rARM_SP, rARM_LR, rARM_PC};
-static int ReservedRegs[] = {rARM_SUSPEND, rARM_SELF, rARM_SP, rARM_LR, rARM_PC};
-static int FpRegs[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
-                       fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15,
-                       fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23,
-                       fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31};
-static int core_temps[] = {r0, r1, r2, r3, r12};
-static int fp_temps[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
-                        fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15};
+// TODO: rework this when c++11 support allows.
+static const RegStorage core_regs_arr[] =
+    {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
+     rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static const RegStorage sp_regs_arr[] =
+    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
+     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
+     rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
+     rs_fr31};
+static const RegStorage dp_regs_arr[] =
+    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
+     rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+static const RegStorage reserved_regs_arr[] =
+    {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+static const RegStorage sp_temps_arr[] =
+    {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
+     rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
+static const RegStorage dp_temps_arr[] =
+    {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
+
+static const std::vector<RegStorage> empty_pool;
+static const std::vector<RegStorage> core_regs(core_regs_arr,
+    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
+static const std::vector<RegStorage> sp_regs(sp_regs_arr,
+    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
+static const std::vector<RegStorage> dp_regs(dp_regs_arr,
+    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
+static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
+    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
+static const std::vector<RegStorage> core_temps(core_temps_arr,
+    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr,
+    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr,
+    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
 
 RegLocation ArmMir2Lir::LocCReturn() {
   return arm_loc_c_return;
@@ -54,74 +80,61 @@
 
 // Return a target-dependent special register.
 RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) {
-  int res_reg = RegStorage::kInvalidRegVal;
+  RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
-    case kSelf: res_reg = rARM_SELF; break;
-    case kSuspend: res_reg =  rARM_SUSPEND; break;
-    case kLr: res_reg =  rARM_LR; break;
-    case kPc: res_reg =  rARM_PC; break;
-    case kSp: res_reg =  rARM_SP; break;
-    case kArg0: res_reg = rARM_ARG0; break;
-    case kArg1: res_reg = rARM_ARG1; break;
-    case kArg2: res_reg = rARM_ARG2; break;
-    case kArg3: res_reg = rARM_ARG3; break;
-    case kFArg0: res_reg = rARM_FARG0; break;
-    case kFArg1: res_reg = rARM_FARG1; break;
-    case kFArg2: res_reg = rARM_FARG2; break;
-    case kFArg3: res_reg = rARM_FARG3; break;
-    case kRet0: res_reg = rARM_RET0; break;
-    case kRet1: res_reg = rARM_RET1; break;
-    case kInvokeTgt: res_reg = rARM_INVOKE_TGT; break;
-    case kHiddenArg: res_reg = r12; break;
-    case kHiddenFpArg: res_reg = RegStorage::kInvalidRegVal; break;
-    case kCount: res_reg = rARM_COUNT; break;
+    case kSelf: res_reg = rs_rARM_SELF; break;
+    case kSuspend: res_reg =  rs_rARM_SUSPEND; break;
+    case kLr: res_reg =  rs_rARM_LR; break;
+    case kPc: res_reg =  rs_rARM_PC; break;
+    case kSp: res_reg =  rs_rARM_SP; break;
+    case kArg0: res_reg = rs_r0; break;
+    case kArg1: res_reg = rs_r1; break;
+    case kArg2: res_reg = rs_r2; break;
+    case kArg3: res_reg = rs_r3; break;
+    case kFArg0: res_reg = rs_r0; break;
+    case kFArg1: res_reg = rs_r1; break;
+    case kFArg2: res_reg = rs_r2; break;
+    case kFArg3: res_reg = rs_r3; break;
+    case kRet0: res_reg = rs_r0; break;
+    case kRet1: res_reg = rs_r1; break;
+    case kInvokeTgt: res_reg = rs_rARM_LR; break;
+    case kHiddenArg: res_reg = rs_r12; break;
+    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
+    case kCount: res_reg = RegStorage::InvalidReg(); break;
   }
-  return RegStorage::Solo32(res_reg);
+  return res_reg;
 }
 
 RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
   // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
   switch (arg_num) {
     case 0:
-      return rs_rARM_ARG1;
+      return rs_r1;
     case 1:
-      return rs_rARM_ARG2;
+      return rs_r2;
     case 2:
-      return rs_rARM_ARG3;
+      return rs_r3;
     default:
       return RegStorage::InvalidReg();
   }
 }
 
-// Create a double from a pair of singles.
-int ArmMir2Lir::S2d(int low_reg, int high_reg) {
-  return ARM_S2D(low_reg, high_reg);
-}
-
-// Return mask to strip off fp reg flags and bias.
-uint32_t ArmMir2Lir::FpRegMask() {
-  return ARM_FP_REG_MASK;
-}
-
-// True if both regs single, both core or both double.
-bool ArmMir2Lir::SameRegType(int reg1, int reg2) {
-  return (ARM_REGTYPE(reg1) == ARM_REGTYPE(reg2));
-}
-
 /*
  * Decode the register id.
  */
-uint64_t ArmMir2Lir::GetRegMaskCommon(int reg) {
+uint64_t ArmMir2Lir::GetRegMaskCommon(RegStorage reg) {
   uint64_t seed;
   int shift;
-  int reg_id;
-
-
-  reg_id = reg & 0x1f;
+  int reg_id = reg.GetRegNum();
   /* Each double register is equal to a pair of single-precision FP registers */
-  seed = ARM_DOUBLEREG(reg) ? 3 : 1;
+  if (reg.IsDouble()) {
+    seed = 0x3;
+    reg_id = reg_id << 1;
+  } else {
+    seed = 1;
+  }
   /* FP register starts at bit position 16 */
-  shift = ARM_FPREG(reg) ? kArmFPReg0 : 0;
+  shift = reg.IsFloat() ? kArmFPReg0 : 0;
   /* Expand the double register id into single offset */
   shift += reg_id;
   return (seed << shift);
@@ -196,7 +209,7 @@
     }
     /* Fixup for kThumbPush/lr and kThumbPop/pc */
     if (opcode == kThumbPush || opcode == kThumbPop) {
-      uint64_t r8Mask = GetRegMaskCommon(r8);
+      uint64_t r8Mask = GetRegMaskCommon(rs_r8);
       if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
         lir->u.m.use_mask &= ~r8Mask;
         lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
@@ -274,9 +287,9 @@
     if (vector & 0x1) {
       int reg_id = i;
       if (opcode == kThumbPush && i == 8) {
-        reg_id = r14lr;
+        reg_id = rs_rARM_LR.GetRegNum();
       } else if (opcode == kThumbPop && i == 8) {
-        reg_id = r15pc;
+        reg_id = rs_rARM_PC.GetRegNum();
       }
       if (printed) {
         snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
@@ -391,10 +404,10 @@
              snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
              break;
            case 's':
-             snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand));
              break;
            case 'S':
-             snprintf(tbuf, arraysize(tbuf), "d%d", (operand & ARM_FP_REG_MASK) >> 1);
+             snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand));
              break;
            case 'h':
              snprintf(tbuf, arraysize(tbuf), "%04x", operand);
@@ -404,6 +417,7 @@
              snprintf(tbuf, arraysize(tbuf), "%d", operand);
              break;
            case 'C':
+             operand = RegStorage::RegNum(operand);
              DCHECK_LT(operand, static_cast<int>(
                  sizeof(core_reg_names)/sizeof(core_reg_names[0])));
              snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
@@ -509,6 +523,21 @@
   return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
 }
 
+bool ArmMir2Lir::SupportsVolatileLoadStore(OpSize size) {
+  return true;
+}
+
+RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  if (UNLIKELY(is_volatile)) {
+    // On arm, atomic 64-bit load/store requires a core register pair.
+    // Smaller aligned load/store is atomic for both core and fp registers.
+    if (size == k64 || size == kDouble) {
+      return kCoreReg;
+    }
+  }
+  return RegClassBySize(size);
+}
+
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena) {
   // Sanity check - make sure encoding map lines up.
@@ -526,73 +555,44 @@
   return new ArmMir2Lir(cu, mir_graph, arena);
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage ArmMir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  } else {
-    RegStorage low_reg = AllocTemp();
-    RegStorage high_reg = AllocTemp();
-    return RegStorage::MakeRegPair(low_reg, high_reg);
-  }
-}
-
-RegStorage ArmMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg))
-    return AllocTempFloat();
-  return AllocTemp();
-}
-
 void ArmMir2Lir::CompilerInitializeRegAlloc() {
-  int num_regs = sizeof(core_regs)/sizeof(*core_regs);
-  int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs);
-  int num_temps = sizeof(core_temps)/sizeof(*core_temps);
-  int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
-  int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
-                                                       kArenaAllocRegAlloc));
-  reg_pool_->num_core_regs = num_regs;
-  reg_pool_->core_regs = reinterpret_cast<RegisterInfo*>
-      (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), kArenaAllocRegAlloc));
-  reg_pool_->num_fp_regs = num_fp_regs;
-  reg_pool_->FPRegs = static_cast<RegisterInfo*>
-      (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), kArenaAllocRegAlloc));
-  CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
-  CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
+                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
+                                        core_temps, empty_pool /* core64_temps */, sp_temps,
+                                        dp_temps);
 
-  // Keep special registers from being allocated
-  // Don't reserve the r4 if we are doing implicit suspend checks.
+  // Target-specific adjustments.
+
+  // Alias single precision floats to appropriate half of overlapping double.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    int sp_reg_num = info->GetReg().GetRegNum();
+    int dp_reg_num = sp_reg_num >> 1;
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // Double precision register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect single precision's master storage to master.
+    info->SetMaster(dp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+    if (sp_reg_num & 1) {
+      // For odd singles, change to user the high word of the backing double.
+      info->SetStorageMask(0x2);
+    }
+  }
+
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
   bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
-  for (int i = 0; i < num_reserved; i++) {
-    if (no_suspend && (ReservedRegs[i] == rARM_SUSPEND)) {
-      // Don't reserve the suspend register.
-      continue;
-    }
-    MarkInUse(ReservedRegs[i]);
-  }
-  // Mark temp regs - all others not in use can be used for promotion
-  for (int i = 0; i < num_temps; i++) {
-    MarkTemp(core_temps[i]);
-  }
-  for (int i = 0; i < num_fp_temps; i++) {
-    MarkTemp(fp_temps[i]);
+  if (no_suspend) {
+    GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
   }
 
-  // Start allocation at r2 in an attempt to avoid clobbering return values
-  reg_pool_->next_core_reg = r2;
-}
-
-void ArmMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust when we roll to hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 0;
+  reg_pool_->next_dp_reg_ = 0;
 }
 
 /*
@@ -602,7 +602,7 @@
  */
 
 void ArmMir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rARM_LR);
+  core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum());
   num_core_spills_++;
 }
 
@@ -612,123 +612,99 @@
  * include any holes in the mask.  Associate holes with
  * Dalvik register INVALID_VREG (0xFFFFU).
  */
-void ArmMir2Lir::MarkPreservedSingle(int v_reg, int reg) {
-  DCHECK_GE(reg, ARM_FP_REG_MASK + ARM_FP_CALLEE_SAVE_BASE);
-  reg = (reg & ARM_FP_REG_MASK) - ARM_FP_CALLEE_SAVE_BASE;
+void ArmMir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
+  DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
+  int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
   // Ensure fp_vmap_table is large enough
   int table_size = fp_vmap_table_.size();
-  for (int i = table_size; i < (reg + 1); i++) {
+  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
     fp_vmap_table_.push_back(INVALID_VREG);
   }
   // Add the current mapping
-  fp_vmap_table_[reg] = v_reg;
+  fp_vmap_table_[adjusted_reg_num] = v_reg;
   // Size of fp_vmap_table is high-water mark, use to set mask
   num_fp_spills_ = fp_vmap_table_.size();
   fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
 }
 
-void ArmMir2Lir::FlushRegWide(RegStorage reg) {
-  RegisterInfo* info1 = GetRegInfo(reg.GetLowReg());
-  RegisterInfo* info2 = GetRegInfo(reg.GetHighReg());
-  DCHECK(info1 && info2 && info1->pair && info2->pair &&
-       (info1->partner == info2->reg) &&
-       (info2->partner == info1->reg));
-  if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) {
-    if (!(info1->is_temp && info2->is_temp)) {
-      /* Should not happen.  If it does, there's a problem in eval_loc */
-      LOG(FATAL) << "Long half-temp, half-promoted";
-    }
-
-    info1->dirty = false;
-    info2->dirty = false;
-    if (mir_graph_->SRegToVReg(info2->s_reg) <
-      mir_graph_->SRegToVReg(info1->s_reg))
-      info1 = info2;
-    int v_reg = mir_graph_->SRegToVReg(info1->s_reg);
-    StoreBaseDispWide(rs_rARM_SP, VRegOffset(v_reg),
-                      RegStorage(RegStorage::k64BitPair, info1->reg, info1->partner));
-  }
-}
-
-void ArmMir2Lir::FlushReg(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg.GetReg());
-  if (info->live && info->dirty) {
-    info->dirty = false;
-    int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rARM_SP, VRegOffset(v_reg), reg, k32);
-  }
-}
-
-/* Give access to the target-dependent FP register encoding to common code */
-bool ArmMir2Lir::IsFpReg(int reg) {
-  return ARM_FPREG(reg);
-}
-
-bool ArmMir2Lir::IsFpReg(RegStorage reg) {
-  return IsFpReg(reg.IsPair() ? reg.GetLowReg() : reg.GetReg());
+void ArmMir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
+  // TEMP: perform as 2 singles.
+  int reg_num = reg.GetRegNum() << 1;
+  RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
+  RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
+  MarkPreservedSingle(v_reg, lo);
+  MarkPreservedSingle(v_reg + 1, hi);
 }
 
 /* Clobber all regs that might be used by an external C call */
 void ArmMir2Lir::ClobberCallerSave() {
-  Clobber(r0);
-  Clobber(r1);
-  Clobber(r2);
-  Clobber(r3);
-  Clobber(r12);
-  Clobber(r14lr);
-  Clobber(fr0);
-  Clobber(fr1);
-  Clobber(fr2);
-  Clobber(fr3);
-  Clobber(fr4);
-  Clobber(fr5);
-  Clobber(fr6);
-  Clobber(fr7);
-  Clobber(fr8);
-  Clobber(fr9);
-  Clobber(fr10);
-  Clobber(fr11);
-  Clobber(fr12);
-  Clobber(fr13);
-  Clobber(fr14);
-  Clobber(fr15);
+  // TODO: rework this - it's gotten even more ugly.
+  Clobber(rs_r0);
+  Clobber(rs_r1);
+  Clobber(rs_r2);
+  Clobber(rs_r3);
+  Clobber(rs_r12);
+  Clobber(rs_r14lr);
+  Clobber(rs_fr0);
+  Clobber(rs_fr1);
+  Clobber(rs_fr2);
+  Clobber(rs_fr3);
+  Clobber(rs_fr4);
+  Clobber(rs_fr5);
+  Clobber(rs_fr6);
+  Clobber(rs_fr7);
+  Clobber(rs_fr8);
+  Clobber(rs_fr9);
+  Clobber(rs_fr10);
+  Clobber(rs_fr11);
+  Clobber(rs_fr12);
+  Clobber(rs_fr13);
+  Clobber(rs_fr14);
+  Clobber(rs_fr15);
+  Clobber(rs_dr0);
+  Clobber(rs_dr1);
+  Clobber(rs_dr2);
+  Clobber(rs_dr3);
+  Clobber(rs_dr4);
+  Clobber(rs_dr5);
+  Clobber(rs_dr6);
+  Clobber(rs_dr7);
 }
 
 RegLocation ArmMir2Lir::GetReturnWideAlt() {
   RegLocation res = LocCReturnWide();
-  res.reg.SetReg(r2);
-  res.reg.SetHighReg(r3);
-  Clobber(r2);
-  Clobber(r3);
-  MarkInUse(r2);
-  MarkInUse(r3);
-  MarkPair(res.reg.GetLowReg(), res.reg.GetHighReg());
+  res.reg.SetLowReg(rs_r2.GetReg());
+  res.reg.SetHighReg(rs_r3.GetReg());
+  Clobber(rs_r2);
+  Clobber(rs_r3);
+  MarkInUse(rs_r2);
+  MarkInUse(rs_r3);
+  MarkWide(res.reg);
   return res;
 }
 
 RegLocation ArmMir2Lir::GetReturnAlt() {
   RegLocation res = LocCReturn();
-  res.reg.SetReg(r1);
-  Clobber(r1);
-  MarkInUse(r1);
+  res.reg.SetReg(rs_r1.GetReg());
+  Clobber(rs_r1);
+  MarkInUse(rs_r1);
   return res;
 }
 
 /* To be used when explicitly managing register use */
 void ArmMir2Lir::LockCallTemps() {
-  LockTemp(r0);
-  LockTemp(r1);
-  LockTemp(r2);
-  LockTemp(r3);
+  LockTemp(rs_r0);
+  LockTemp(rs_r1);
+  LockTemp(rs_r2);
+  LockTemp(rs_r3);
 }
 
 /* To be used when explicitly managing register use */
 void ArmMir2Lir::FreeCallTemps() {
-  FreeTemp(r0);
-  FreeTemp(r1);
-  FreeTemp(r2);
-  FreeTemp(r3);
+  FreeTemp(rs_r0);
+  FreeTemp(rs_r1);
+  FreeTemp(rs_r2);
+  FreeTemp(rs_r3);
 }
 
 RegStorage ArmMir2Lir::LoadHelper(ThreadOffset<4> offset) {
@@ -736,6 +712,11 @@
   return rs_rARM_LR;
 }
 
+RegStorage ArmMir2Lir::LoadHelper(ThreadOffset<8> offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+  return RegStorage::InvalidReg();
+}
+
 LIR* ArmMir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = rs_r0;
   Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
@@ -758,4 +739,59 @@
   return ArmMir2Lir::EncodingMap[opcode].fmt;
 }
 
+/*
+ * Somewhat messy code here.  We want to allocate a pair of contiguous
+ * physical single-precision floating point registers starting with
+ * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
+ * has already been allocated - try to fit if possible.  Fail to
+ * allocate if we can't meet the requirements for the pair of
+ * s_reg<=sX[even] & (s_reg+1)<= sX+1.
+ */
+// TODO: needs rewrite to support non-backed 64-bit float regs.
+RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) {
+  RegStorage res;
+  int v_reg = mir_graph_->SRegToVReg(s_reg);
+  int p_map_idx = SRegToPMap(s_reg);
+  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
+    // Upper reg is already allocated.  Can we fit?
+    int high_reg = promotion_map_[p_map_idx+1].FpReg;
+    if ((high_reg & 1) == 0) {
+      // High reg is even - fail.
+      return res;  // Invalid.
+    }
+    // Is the low reg of the pair free?
+    // FIXME: rework.
+    RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
+    if (p->InUse() || p->IsTemp()) {
+      // Already allocated or not preserved - fail.
+      return res;  // Invalid.
+    }
+    // OK - good to go.
+    res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
+    p->MarkInUse();
+    MarkPreservedSingle(v_reg, p->GetReg());
+  } else {
+    /*
+     * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
+     * different underlying physical registers.
+     */
+    GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
+    for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+      if (!info->IsTemp() && !info->InUse()) {
+        res = info->GetReg();
+        info->MarkInUse();
+        MarkPreservedDouble(v_reg, info->GetReg());
+        break;
+      }
+    }
+  }
+  if (res.Valid()) {
+    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
+    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
+    promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
+  }
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 2e64f74..86d32f4 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -69,7 +69,7 @@
 }
 
 LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) {
-  DCHECK(ARM_SINGLEREG(r_dest));
+  DCHECK(RegStorage::IsSingle(r_dest));
   if (value == 0) {
     // TODO: we need better info about the target CPU.  a vector exclusive or
     //       would probably be better here if we could rely on its existance.
@@ -88,7 +88,7 @@
     data_target = AddWordData(&literal_list_, value);
   }
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
-                          r_dest, r15pc, 0, 0, 0, data_target);
+                          r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
   SetMemRefType(load_pc_rel, true, kLiteral);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
@@ -173,12 +173,12 @@
   LIR* res;
   int mod_imm;
 
-  if (ARM_FPREG(r_dest.GetReg())) {
+  if (r_dest.IsFloat()) {
     return LoadFPConstantValue(r_dest.GetReg(), value);
   }
 
   /* See if the value can be constructed cheaply */
-  if (ARM_LOWREG(r_dest.GetReg()) && (value >= 0) && (value <= 255)) {
+  if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
     return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
   }
   /* Check Modified immediate special cases */
@@ -204,7 +204,7 @@
 }
 
 LIR* ArmMir2Lir::OpUnconditionalBranch(LIR* target) {
-  LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched  during assembly*/);
+  LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched  during assembly */);
   res->target = target;
   return res;
 }
@@ -237,7 +237,7 @@
 LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
                                int shift) {
   bool thumb_form =
-      ((shift == 0) && ARM_LOWREG(r_dest_src1.GetReg()) && ARM_LOWREG(r_src2.GetReg()));
+      ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
   ArmOpcode opcode = kThumbBkpt;
   switch (op) {
     case kOpAdc:
@@ -256,9 +256,9 @@
     case kOpCmp:
       if (thumb_form)
         opcode = kThumbCmpRR;
-      else if ((shift == 0) && !ARM_LOWREG(r_dest_src1.GetReg()) && !ARM_LOWREG(r_src2.GetReg()))
+      else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
         opcode = kThumbCmpHH;
-      else if ((shift == 0) && ARM_LOWREG(r_dest_src1.GetReg()))
+      else if ((shift == 0) && r_dest_src1.Low8())
         opcode = kThumbCmpLH;
       else if (shift == 0)
         opcode = kThumbCmpHL;
@@ -270,11 +270,11 @@
       break;
     case kOpMov:
       DCHECK_EQ(shift, 0);
-      if (ARM_LOWREG(r_dest_src1.GetReg()) && ARM_LOWREG(r_src2.GetReg()))
+      if (r_dest_src1.Low8() && r_src2.Low8())
         opcode = kThumbMovRR;
-      else if (!ARM_LOWREG(r_dest_src1.GetReg()) && !ARM_LOWREG(r_src2.GetReg()))
+      else if (!r_dest_src1.Low8() && !r_src2.Low8())
         opcode = kThumbMovRR_H2H;
-      else if (ARM_LOWREG(r_dest_src1.GetReg()))
+      else if (r_dest_src1.Low8())
         opcode = kThumbMovRR_H2L;
       else
         opcode = kThumbMovRR_L2H;
@@ -389,8 +389,7 @@
 LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
                                   RegStorage r_src2, int shift) {
   ArmOpcode opcode = kThumbBkpt;
-  bool thumb_form = (shift == 0) && ARM_LOWREG(r_dest.GetReg()) && ARM_LOWREG(r_src1.GetReg()) &&
-      ARM_LOWREG(r_src2.GetReg());
+  bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
   switch (op) {
     case kOpAdd:
       opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
@@ -466,7 +465,7 @@
   int32_t abs_value = (neg) ? -value : value;
   ArmOpcode opcode = kThumbBkpt;
   ArmOpcode alt_opcode = kThumbBkpt;
-  bool all_low_regs = (ARM_LOWREG(r_dest.GetReg()) && ARM_LOWREG(r_src1.GetReg()));
+  bool all_low_regs = r_dest.Low8() && r_src1.Low8();
   int32_t mod_imm = ModifiedImmediate(value);
 
   switch (op) {
@@ -488,10 +487,9 @@
     case kOpRor:
       return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
     case kOpAdd:
-      if (ARM_LOWREG(r_dest.GetReg()) && (r_src1 == rs_r13sp) &&
-        (value <= 1020) && ((value & 0x3) == 0)) {
+      if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
         return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
-      } else if (ARM_LOWREG(r_dest.GetReg()) && (r_src1 == rs_r15pc) &&
+      } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
           (value <= 1020) && ((value & 0x3) == 0)) {
         return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
       }
@@ -601,7 +599,7 @@
 LIR* ArmMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
   bool neg = (value < 0);
   int32_t abs_value = (neg) ? -value : value;
-  bool short_form = (((abs_value & 0xff) == abs_value) && ARM_LOWREG(r_dest_src1.GetReg()));
+  bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
   ArmOpcode opcode = kThumbBkpt;
   switch (op) {
     case kOpAdd:
@@ -643,22 +641,24 @@
   LIR* res = NULL;
   int32_t val_lo = Low32Bits(value);
   int32_t val_hi = High32Bits(value);
-  int target_reg = S2d(r_dest.GetLowReg(), r_dest.GetHighReg());
-  if (ARM_FPREG(r_dest.GetLowReg())) {
+  if (r_dest.IsFloat()) {
+    DCHECK(!r_dest.IsPair());
     if ((val_lo == 0) && (val_hi == 0)) {
       // TODO: we need better info about the target CPU.  a vector exclusive or
       //       would probably be better here if we could rely on its existance.
       // Load an immediate +2.0 (which encodes to 0)
-      NewLIR2(kThumb2Vmovd_IMM8, target_reg, 0);
+      NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
       // +0.0 = +2.0 - +2.0
-      res = NewLIR3(kThumb2Vsubd, target_reg, target_reg, target_reg);
+      res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
     } else {
       int encoded_imm = EncodeImmDouble(value);
       if (encoded_imm >= 0) {
-        res = NewLIR2(kThumb2Vmovd_IMM8, target_reg, encoded_imm);
+        res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
       }
     }
   } else {
+    // NOTE: Arm32 assumption here.
+    DCHECK(r_dest.IsPair());
     if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
       res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
       LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
@@ -670,13 +670,13 @@
     if (data_target == NULL) {
       data_target = AddWideData(&literal_list_, val_lo, val_hi);
     }
-    if (ARM_FPREG(r_dest.GetLowReg())) {
+    if (r_dest.IsFloat()) {
       res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
-                   target_reg, r15pc, 0, 0, 0, data_target);
+                   r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
     } else {
       DCHECK(r_dest.IsPair());
       res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
-                   r_dest.GetLowReg(), r_dest.GetHighReg(), r15pc, 0, 0, data_target);
+                   r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
     }
     SetMemRefType(res, true, kLiteral);
     AppendLIR(res);
@@ -690,22 +690,20 @@
 
 LIR* ArmMir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                  int scale, OpSize size) {
-  bool all_low_regs = ARM_LOWREG(r_base.GetReg()) && ARM_LOWREG(r_index.GetReg()) &&
-      ARM_LOWREG(r_dest.GetReg());
+  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
   LIR* load;
   ArmOpcode opcode = kThumbBkpt;
   bool thumb_form = (all_low_regs && (scale == 0));
   RegStorage reg_ptr;
 
-  if (ARM_FPREG(r_dest.GetReg())) {
-    if (ARM_SINGLEREG(r_dest.GetReg())) {
+  if (r_dest.IsFloat()) {
+    if (r_dest.IsSingle()) {
       DCHECK((size == k32) || (size == kSingle) || (size == kReference));
       opcode = kThumb2Vldrs;
       size = kSingle;
     } else {
-      DCHECK(ARM_DOUBLEREG(r_dest.GetReg()));
+      DCHECK(r_dest.IsDouble());
       DCHECK((size == k64) || (size == kDouble));
-      DCHECK_EQ((r_dest.GetReg() & 0x1), 0);
       opcode = kThumb2Vldrd;
       size = kDouble;
     }
@@ -758,20 +756,19 @@
 
 LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                   int scale, OpSize size) {
-  bool all_low_regs = ARM_LOWREG(r_base.GetReg()) && ARM_LOWREG(r_index.GetReg()) &&
-      ARM_LOWREG(r_src.GetReg());
+  bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
   LIR* store = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool thumb_form = (all_low_regs && (scale == 0));
   RegStorage reg_ptr;
 
-  if (ARM_FPREG(r_src.GetReg())) {
-    if (ARM_SINGLEREG(r_src.GetReg())) {
+  if (r_src.IsFloat()) {
+    if (r_src.IsSingle()) {
       DCHECK((size == k32) || (size == kSingle) || (size == kReference));
       opcode = kThumb2Vstrs;
       size = kSingle;
     } else {
-      DCHECK(ARM_DOUBLEREG(r_src.GetReg()));
+      DCHECK(r_src.IsDouble());
       DCHECK((size == k64) || (size == kDouble));
       DCHECK_EQ((r_src.GetReg() & 0x1), 0);
       opcode = kThumb2Vstrd;
@@ -828,49 +825,46 @@
  * performing null check, incoming MIR can be null.
  */
 LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  OpSize size, int s_reg) {
+                                  OpSize size) {
   LIR* load = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool short_form = false;
   bool thumb2Form = (displacement < 4092 && displacement >= 0);
-  bool all_low = r_dest.Is32Bit() && ARM_LOWREG(r_base.GetReg() && ARM_LOWREG(r_dest.GetReg()));
+  bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
   int encoded_disp = displacement;
   bool already_generated = false;
-  int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
-  bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64:
-      if (ARM_FPREG(dest_low_reg)) {
-        // Note: following change to avoid using pairs for doubles, replace conversion w/ DCHECK.
-        if (r_dest.IsPair()) {
-          DCHECK(ARM_FPREG(r_dest.GetHighReg()));
-          r_dest = RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg()));
-        }
-        opcode = kThumb2Vldrd;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
-      } else {
-        if (displacement <= 1020) {
-          load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
-                         displacement >> 2);
-        } else {
-          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
-          null_pointer_safepoint = true;
-          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
-        }
-        already_generated = true;
+    case k64: {
+      DCHECK_EQ(displacement & 3, 0);
+      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vldrd/kThumb2LdrdI8.
+      RegStorage r_ptr = r_base;
+      if ((displacement & ~1020) != 0) {
+        // For core register load, use the r_dest.GetLow() for the temporary pointer.
+        r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow();
+        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
       }
+      if (r_dest.IsFloat()) {
+        DCHECK(!r_dest.IsPair());
+        load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
+      } else {
+        load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(),
+                       encoded_disp);
+      }
+      if ((displacement & ~1020) != 0 && r_dest.IsFloat()) {
+        FreeTemp(r_ptr);
+      }
+      already_generated = true;
       break;
+    }
     case kSingle:
     // Intentional fall-though.
     case k32:
     // Intentional fall-though.
     case kReference:
-      if (ARM_FPREG(r_dest.GetReg())) {
+      if (r_dest.IsFloat()) {
         opcode = kThumb2Vldrs;
         if (displacement <= 1020) {
           short_form = true;
@@ -878,13 +872,13 @@
         }
         break;
       }
-      if (ARM_LOWREG(r_dest.GetReg()) && (r_base.GetReg() == r15pc) &&
-          (displacement <= 1020) && (displacement >= 0)) {
+      if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
+          (displacement >= 0)) {
         short_form = true;
         encoded_disp >>= 2;
         opcode = kThumbLdrPcRel;
-      } else if (ARM_LOWREG(r_dest.GetReg()) && (r_base.GetReg() == r13sp) &&
-          (displacement <= 1020) && (displacement >= 0)) {
+      } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
+                 (displacement >= 0)) {
         short_form = true;
         encoded_disp >>= 2;
         opcode = kThumbLdrSpRel;
@@ -940,10 +934,10 @@
     } else {
       RegStorage reg_offset = AllocTemp();
       LoadConstant(reg_offset, encoded_disp);
-      if (ARM_FPREG(dest_low_reg)) {
+      if (r_dest.IsFloat()) {
         // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
         OpRegReg(kOpAdd, reg_offset, r_base);
-        load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg);
+        load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
       } else {
         load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
       }
@@ -954,28 +948,35 @@
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
-  } else {
-     // We might need to generate a safepoint if we have two store instructions (wide or double).
-     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-       MarkSafepointPC(load);
-     }
   }
   return load;
 }
 
-LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                              int s_reg) {
-  DCHECK(!((size == k64) || (size == kDouble)));
+LIR* ArmMir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                                      OpSize size) {
+  // Only 64-bit load needs special handling.
+  if (UNLIKELY(size == k64 || size == kDouble)) {
+    DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadSave().
+    // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
+    if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) {
+      // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
+      RegStorage r_ptr = AllocTemp();
+      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+      LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
+      FreeTemp(r_ptr);
+      return lir;
+    }
+  }
+  return LoadBaseDisp(r_base, displacement, r_dest, size);
+}
+
+LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
-}
-
-LIR* ArmMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg);
+  return LoadBaseDispBody(r_base, displacement, r_dest, size);
 }
 
 
@@ -985,42 +986,41 @@
   ArmOpcode opcode = kThumbBkpt;
   bool short_form = false;
   bool thumb2Form = (displacement < 4092 && displacement >= 0);
-  bool all_low = r_src.Is32Bit() && (ARM_LOWREG(r_base.GetReg()) && ARM_LOWREG(r_src.GetReg()));
+  bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
   int encoded_disp = displacement;
   bool already_generated = false;
-  int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
-  bool null_pointer_safepoint = false;
   switch (size) {
-    case k64:
     case kDouble:
-      if (!ARM_FPREG(src_low_reg)) {
-        if (displacement <= 1020) {
-          store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
-                          displacement >> 2);
-        } else {
-          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
-          null_pointer_safepoint = true;
-          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
-        }
-        already_generated = true;
-      } else {
-        // Note: following change to avoid using pairs for doubles, replace conversion w/ DCHECK.
-        if (r_src.IsPair()) {
-          DCHECK(ARM_FPREG(r_src.GetHighReg()));
-          r_src = RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg()));
-        }
-        opcode = kThumb2Vstrd;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+    // Intentional fall-though.
+    case k64: {
+      DCHECK_EQ(displacement & 3, 0);
+      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vstrd/kThumb2StrdI8.
+      RegStorage r_ptr = r_base;
+      if ((displacement & ~1020) != 0) {
+        r_ptr = AllocTemp();
+        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
       }
+      if (r_src.IsFloat()) {
+        DCHECK(!r_src.IsPair());
+        store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp);
+      } else {
+        store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(),
+                        encoded_disp);
+      }
+      if ((displacement & ~1020) != 0) {
+        FreeTemp(r_ptr);
+      }
+      already_generated = true;
       break;
+    }
     case kSingle:
+    // Intentional fall-through.
     case k32:
+    // Intentional fall-through.
     case kReference:
-      if (ARM_FPREG(r_src.GetReg())) {
-        DCHECK(ARM_SINGLEREG(r_src.GetReg()));
+      if (r_src.IsFloat()) {
+        DCHECK(r_src.IsSingle());
         opcode = kThumb2Vstrs;
         if (displacement <= 1020) {
           short_form = true;
@@ -1028,8 +1028,7 @@
         }
         break;
       }
-      if (ARM_LOWREG(r_src.GetReg()) && (r_base == rs_r13sp) &&
-          (displacement <= 1020) && (displacement >= 0)) {
+      if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
         short_form = true;
         encoded_disp >>= 2;
         opcode = kThumbStrSpRel;
@@ -1074,7 +1073,7 @@
     } else {
       RegStorage r_scratch = AllocTemp();
       LoadConstant(r_scratch, encoded_disp);
-      if (ARM_FPREG(src_low_reg)) {
+      if (r_src.IsFloat()) {
         // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
         OpRegReg(kOpAdd, r_scratch, r_base);
         store = StoreBaseDispBody(r_scratch, 0, r_src, size);
@@ -1088,39 +1087,65 @@
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
-  } else {
-    // We might need to generate a safepoint if we have two store instructions (wide or double).
-    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
-      MarkSafepointPC(store);
-    }
   }
   return store;
 }
 
+LIR* ArmMir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                                       OpSize size) {
+  // Only 64-bit store needs special handling.
+  if (UNLIKELY(size == k64 || size == kDouble)) {
+    DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadSave().
+    // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
+    if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) {
+      // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
+      RegStorage r_ptr = AllocTemp();
+      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+      LIR* fail_target = NewLIR0(kPseudoTargetLabel);
+      // We have only 5 temporary registers available and if r_base, r_src and r_ptr already
+      // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
+      // in LDREXD and recalculate it from r_base.
+      RegStorage r_temp = AllocTemp();
+      RegStorage r_temp_high = AllocFreeTemp();  // We may not have another temp.
+      if (r_temp_high.Valid()) {
+        NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
+        FreeTemp(r_temp_high);
+        FreeTemp(r_temp);
+      } else {
+        // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
+        NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
+        FreeTemp(r_temp);  // May need the temp for kOpAdd.
+        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+      }
+      LIR* lir = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(),
+                         r_ptr.GetReg());
+      OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
+      FreeTemp(r_ptr);
+      return lir;
+    }
+  }
+  return StoreBaseDisp(r_base, displacement, r_src, size);
+}
+
 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  DCHECK(!((size == k64) || (size == kDouble)));
   return StoreBaseDispBody(r_base, displacement, r_src, size);
 }
 
-LIR* ArmMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src, k64);
-}
-
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
-  DCHECK_EQ(ARM_DOUBLEREG(r_dest.GetReg()), ARM_DOUBLEREG(r_src.GetReg()));
-  if (ARM_DOUBLEREG(r_dest.GetReg())) {
+  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
+  if (r_dest.IsDouble()) {
     opcode = kThumb2Vmovd;
   } else {
-    if (ARM_SINGLEREG(r_dest.GetReg())) {
-      opcode = ARM_SINGLEREG(r_src.GetReg()) ? kThumb2Vmovs : kThumb2Fmsr;
+    if (r_dest.IsSingle()) {
+      opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
     } else {
-      DCHECK(ARM_SINGLEREG(r_src.GetReg()));
+      DCHECK(r_src.IsSingle());
       opcode = kThumb2Fmrs;
     }
   }
@@ -1136,14 +1161,18 @@
   return NULL;
 }
 
+LIR* ArmMir2Lir::OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+  return nullptr;
+}
+
 LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
   LOG(FATAL) << "Unexpected use of OpMem for Arm";
   return NULL;
 }
 
 LIR* ArmMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, RegStorage r_src_hi,
-                                      OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_src, OpSize size) {
   LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
   return NULL;
 }
@@ -1154,8 +1183,7 @@
 }
 
 LIR* ArmMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, RegStorage r_dest_hi,
-                                     OpSize size, int s_reg) {
+                                     int displacement, RegStorage r_dest, OpSize size) {
   LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
   return NULL;
 }
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
new file mode 100644
index 0000000..6a6b0f6
--- /dev/null
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
+
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+/*
+ * TODO(Arm64): the comments below are outdated.
+ *
+ * Runtime register usage conventions.
+ *
+ * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
+ *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
+ *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
+ *        registers.
+ * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
+ * r4   : (rA64_SUSPEND) is reserved (suspend check/debugger assist)
+ * r5   : Callee save (promotion target)
+ * r6   : Callee save (promotion target)
+ * r7   : Callee save (promotion target)
+ * r8   : Callee save (promotion target)
+ * r9   : (rA64_SELF) is reserved (pointer to thread-local storage)
+ * r10  : Callee save (promotion target)
+ * r11  : Callee save (promotion target)
+ * r12  : Scratch, may be trashed by linkage stubs
+ * r13  : (sp) is reserved
+ * r14  : (lr) is reserved
+ * r15  : (pc) is reserved
+ *
+ * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
+ * 7 core registers that can be used for promotion
+ *
+ * Floating pointer registers
+ * s0-s31
+ * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ *
+ * s16-s31 (d8-d15) preserved across C calls
+ * s0-s15 (d0-d7) trashed across C calls
+ *
+ * s0-s15/d0-d7 used as codegen temp/scratch
+ * s16-s31/d8-d31 can be used for promotion.
+ *
+ * Calling convention
+ *     o On a call to a Dalvik method, pass target's Method* in r0
+ *     o r1-r3 will be used for up to the first 3 words of arguments
+ *     o Arguments past the first 3 words will be placed in appropriate
+ *       out slots by the caller.
+ *     o If a 64-bit argument would span the register/memory argument
+ *       boundary, it will instead be fully passed in the frame.
+ *     o Maintain a 16-byte stack alignment
+ *
+ *  Stack frame diagram (stack grows down, higher addresses at top):
+ *
+ * +------------------------+
+ * | IN[ins-1]              |  {Note: resides in caller's frame}
+ * |       .                |
+ * | IN[0]                  |
+ * | caller's Method*       |
+ * +========================+  {Note: start of callee's frame}
+ * | spill region           |  {variable sized - will include lr if non-leaf.}
+ * +------------------------+
+ * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
+ * +------------------------+
+ * | V[locals-1]            |
+ * | V[locals-2]            |
+ * |      .                 |
+ * |      .                 |
+ * | V[1]                   |
+ * | V[0]                   |
+ * +------------------------+
+ * |  0 to 3 words padding  |
+ * +------------------------+
+ * | OUT[outs-1]            |
+ * | OUT[outs-2]            |
+ * |       .                |
+ * | OUT[0]                 |
+ * | cur_method*            | <<== sp w/ 16-byte alignment
+ * +========================+
+ */
+
+// First FP callee save.
+#define A64_FP_CALLEE_SAVE_BASE 8
+
+// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
+#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
+#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
+
+enum ArmResourceEncodingPos {
+  kArmGPReg0   = 0,
+  kArmRegLR    = 30,
+  kArmRegSP    = 31,
+  kArmFPReg0   = 32,
+  kArmRegEnd   = 64,
+};
+
+#define ENCODE_ARM_REG_SP           (1ULL << kArmRegSP)
+#define ENCODE_ARM_REG_LR           (1ULL << kArmRegLR)
+
+#define IS_SIGNED_IMM(size, value) \
+  ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
+#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
+#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
+#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
+#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
+
+// Quick macro used to define the registers.
+#define A64_REGISTER_CODE_LIST(R) \
+  R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7) \
+  R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15) \
+  R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
+  R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
+
+// Registers (integer) values.
+enum A64NativeRegisterPool {
+#  define A64_DEFINE_REGISTERS(nr) \
+    rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+    rx##nr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | nr, \
+    rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
+    rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
+  A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
+#undef A64_DEFINE_REGISTERS
+
+  rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
+  rxzr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0x3f,
+  rwsp = rw31,
+  rsp = rx31,
+  rA64_SUSPEND = rx19,
+  rA64_SELF = rx18,
+  rA64_SP = rx31,
+  rA64_LR = rx30,
+  /*
+   * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
+   * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
+   * allocated as 32-bit temp registers.
+   */
+  rA32_SUSPEND = rw19,
+  rA32_SELF = rw18,
+  rA32_SP = rw31,
+  rA32_LR = rw30
+};
+
+#define A64_DEFINE_REGSTORAGES(nr) \
+  constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \
+  constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \
+  constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \
+  constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr);
+A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
+#undef A64_DEFINE_REGSTORAGES
+
+constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
+constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND);
+constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
+constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
+constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
+// TODO: eliminate the need for these.
+constexpr RegStorage rs_rA32_SUSPEND(RegStorage::kValid | rA32_SUSPEND);
+constexpr RegStorage rs_rA32_SELF(RegStorage::kValid | rA32_SELF);
+constexpr RegStorage rs_rA32_SP(RegStorage::kValid | rA32_SP);
+constexpr RegStorage rs_rA32_LR(RegStorage::kValid | rA32_LR);
+
+// RegisterLocation templates return values (following the hard-float calling convention).
+const RegLocation arm_loc_c_return =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_double =
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
+
+/**
+ * @brief Shift-type to be applied to a register via EncodeShift().
+ */
+enum A64ShiftEncodings {
+  kA64Lsl = 0x0,
+  kA64Lsr = 0x1,
+  kA64Asr = 0x2,
+  kA64Ror = 0x3
+};
+
+/**
+ * @brief Extend-type to be applied to a register via EncodeExtend().
+ */
+enum A64RegExtEncodings {
+  kA64Uxtb = 0x0,
+  kA64Uxth = 0x1,
+  kA64Uxtw = 0x2,
+  kA64Uxtx = 0x3,
+  kA64Sxtb = 0x4,
+  kA64Sxth = 0x5,
+  kA64Sxtw = 0x6,
+  kA64Sxtx = 0x7
+};
+
+#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
+
+/*
+ * The following enum defines the list of supported A64 instructions by the
+ * assembler. Their corresponding EncodingMap positions will be defined in
+ * assemble_arm64.cc.
+ */
+enum ArmOpcode {
+  kA64First = 0,
+  kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+  kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrd,       // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
+  kA64Asr3rrr,       // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}".
+  kA64B2ct,          // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
+  kA64Blr1x,         // blr [1101011000111111000000] rn[9-5] [00000].
+  kA64Br1x,          // br  [1101011000011111000000] rn[9-5] [00000].
+  kA64Brk1d,         // brk [11010100001] imm_16[20-5] [00000].
+  kA64B1t,           // b   [00010100] offset_26[25-0].
+  kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
+  kA64Cbz2rt,        // cbz [00110100] imm_19[23-5] rt[4-0].
+  kA64Cmn3rro,       // cmn [s0101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+  kA64Cmn3Rre,       // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmn3RdT,       // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Cmp3rro,       // cmp [s1101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+  kA64Cmp3Rre,       // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+  kA64Cmp3RdT,       // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+  kA64Csel4rrrc,     // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
+  kA64Csinc4rrrc,    // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Csneg4rrrc,    // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+  kA64Dmb1B,         // dmb [11010101000000110011] CRm[11-8] [10111111].
+  kA64Eor3Rrl,       // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Eor4rrro,      // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Extr4rrrd,     // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Fabs2ff,       // fabs[000111100s100000110000] rn[9-5] rd[4-0].
+  kA64Fadd3fff,      // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0].
+  kA64Fcmp1f,        // fcmp[000111100s100000001000] rn[9-5] [01000].
+  kA64Fcmp2ff,       // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000].
+  kA64Fcvtzs2wf,     // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvtzs2xf,     // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0].
+  kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
+  kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
+  kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+  kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
+  kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
+  kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2Sx,       // fmov[1001111001100111000000] rn[9-5] rd[4-0].
+  kA64Fmov2ws,       // fmov[0001111001101110000000] rn[9-5] rd[4-0].
+  kA64Fmov2xS,       // fmov[1001111001101111000000] rn[9-5] rd[4-0].
+  kA64Fmul3fff,      // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0].
+  kA64Fneg2ff,       // fneg[000111100s100001010000] rn[9-5] rd[4-0].
+  kA64Frintz2ff,     // frintz [000111100s100101110000] rn[9-5] rd[4-0].
+  kA64Fsqrt2ff,      // fsqrt[000111100s100001110000] rn[9-5] rd[4-0].
+  kA64Fsub3fff,      // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0].
+  kA64Ldrb3wXd,      // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrb3wXx,      // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXd,     // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsb3rXx,     // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrh3wXF,      // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrh4wXxd,     // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldrsh3rXF,     // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldrsh4rXxd,    // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]
+  kA64Ldr2fp,        // ldr [0s011100] imm_19[23-5] rt[4-0].
+  kA64Ldr2rp,        // ldr [0s011000] imm_19[23-5] rt[4-0].
+  kA64Ldr3fXD,       // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Ldr3rXD,       // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldr4fXxG,      // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Ldr4rXxG,      // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64LdrPost3rXd,   // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Ldp4ffXD,      // ldp [0s10110101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Ldp4rrXD,      // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64LdpPost4rrXD,  // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Ldur3fXd,      // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldur3rXd,      // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Ldxr2rX,       // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
+  kA64Lsl3rrr,       // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
+  kA64Lsr3rrd,       // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
+  kA64Lsr3rrr,       // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
+  kA64Movk3rdM,      // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movn3rdM,      // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Movz3rdM,      // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
+  kA64Mov2rr,        // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mvn2rr,        // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
+  kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
+  kA64Msub4rrrr,     // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0].
+  kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
+  kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Ret,           // ret [11010110010111110000001111000000].
+  kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
+  kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
+  kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
+  kA64Sbc3rrr,       // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+  kA64Sbfm4rrdd,     // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Scvtf2fw,      // scvtf  [000111100s100010000000] rn[9-5] rd[4-0].
+  kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
+  kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
+  kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+  kA64Stp4ffXD,      // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Stp4rrXD,      // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPost4rrXD,  // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64StpPre4rrXD,   // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+  kA64Str3fXD,       // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4fXxG,      // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Str3rXD,       // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Str4rXxG,      // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0].
+  kA64Strb3wXd,      // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strb3wXx,      // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64Strh3wXF,      // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0].
+  kA64Strh4wXxd,     // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+  kA64StrPost3rXd,   // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0].
+  kA64Stur3fXd,      // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+  kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
+  kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+  kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
+  kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+  kA64Last,
+  kA64NotWide = 0,   // Flag used to select the first instruction variant.
+  kA64Wide = 0x1000  // Flag used to select the second instruction variant.
+};
+
+/*
+ * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM"
+ * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM".
+ * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the
+ * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the
+ * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT).
+ */
+
+// Return the wide and no-wide variants of the given opcode.
+#define WIDE(op) ((ArmOpcode)((op) | kA64Wide))
+#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide))
+
+// Whether the given opcode is wide.
+#define IS_WIDE(op) (((op) & kA64Wide) != 0)
+
+/*
+ * Floating point variants. These are just aliases of the macros above which we use for floating
+ * point instructions, just for readibility reasons.
+ * TODO(Arm64): should we remove these and use the original macros?
+ */
+#define FWIDE WIDE
+#define FUNWIDE UNWIDE
+#define IS_FWIDE IS_WIDE
+
+enum ArmOpDmbOptions {
+  kSY = 0xf,
+  kST = 0xe,
+  kISH = 0xb,
+  kISHST = 0xa,
+  kNSH = 0x7,
+  kNSHST = 0x6
+};
+
+// Instruction assembly field_loc kind.
+enum ArmEncodingKind {
+  // All the formats below are encoded in the same way (as a kFmtBitBlt).
+  // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
+  kFmtRegW = 0,  // Word register (w) or wzr.
+  kFmtRegX,      // Extended word register (x) or xzr.
+  kFmtRegR,      // Register with same width as the instruction or zr.
+  kFmtRegWOrSp,  // Word register (w) or wsp.
+  kFmtRegXOrSp,  // Extended word register (x) or sp.
+  kFmtRegROrSp,  // Register with same width as the instruction or sp.
+  kFmtRegS,      // Single FP reg.
+  kFmtRegD,      // Double FP reg.
+  kFmtRegF,      // Single/double FP reg depending on the instruction width.
+  kFmtBitBlt,    // Bit string using end/start.
+
+  // Less likely formats.
+  kFmtUnused,    // Unused field and marks end of formats.
+  kFmtImm21,     // Sign-extended immediate using [23..5,30..29].
+  kFmtShift,     // Register shift, 9-bit at [23..21, 15..10]..
+  kFmtExtend,    // Register extend, 9-bit at [23..21, 15..10].
+  kFmtSkip,      // Unused field, but continue to next.
+};
+
+// TODO(Arm64): should we get rid of kFmtExtend?
+//   Note: the only instructions that use it (cmp, cmn) are not used themselves.
+
+// Struct used to define the snippet positions for each A64 opcode.
+struct ArmEncodingMap {
+  uint32_t wskeleton;
+  uint32_t xskeleton;
+  struct {
+    ArmEncodingKind kind;
+    int end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  ArmOpcode opcode;  // can be WIDE()-ned to indicate it has a wide variant.
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  int size;          // Note: size is in bytes.
+  FixupKind fixup;
+};
+
+#if 0
+// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes).
+struct ArmEncodingMap {
+  uint32_t wskeleton;
+  uint32_t xskeleton;
+  uint64_t flags;
+  const char* name;
+  const char* fmt;
+  struct {
+    uint8_t kind;
+    int8_t end;         // end for kFmtBitBlt, 1-bit slice end for FP regs.
+    int8_t start;       // start for kFmtBitBlt, 4-bit slice end for FP regs.
+  } field_loc[4];
+  uint32_t fixup;
+  uint32_t opcode;         // can be WIDE()-ned to indicate it has a wide variant.
+  uint32_t padding[3];
+};
+#endif
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
new file mode 100644
index 0000000..4a0c055
--- /dev/null
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -0,0 +1,933 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+// The macros below are exclusively used in the encoding map.
+
+// Most generic way of providing two variants for one instructions.
+#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2
+
+// Used for instructions which do not have a wide variant.
+#define NO_VARIANTS(variant) \
+  CUSTOM_VARIANTS(variant, 0)
+
+// Used for instructions which have a wide variant with the sf bit set to 1.
+#define SF_VARIANTS(sf0_skeleton) \
+  CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000))
+
+// Used for instructions which have a wide variant with the size bits set to either x0 or x1.
+#define SIZE_VARIANTS(sizex0_skeleton) \
+  CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000))
+
+// Used for instructions which have a wide variant with the sf and n bits set to 1.
+#define SF_N_VARIANTS(sf0_n0_skeleton) \
+  CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000))
+
+// Used for FP instructions which have a single and double precision variants, with he type bits set
+// to either 00 or 01.
+#define FLOAT_VARIANTS(type00_skeleton) \
+  CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
+
+/*
+ * opcode: ArmOpcode enum
+ * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
+ * a{n}k: key to applying argument {n}    \
+ * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
+ * a{n}e: argument {n} end bit position   /
+ * flags: instruction attributes (used in optimization)
+ * name: mnemonic name
+ * fmt: for pretty-printing
+ * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions).
+ */
+#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \
+                     a3k, a3s, a3e, flags, name, fmt, fixup) \
+        {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \
+                    {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *     3 -> operands[3] (extra)
+ *
+ * [f]ormats:
+ *     d -> decimal
+ *     D -> decimal*4 or decimal*8 depending on the instruction width
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     G -> ", lsl #2" or ", lsl #3" depending on the instruction width
+ *     c -> branch condition (eq, ne, etc.)
+ *     t -> pc-relative target
+ *     p -> pc-relative address
+ *     s -> single precision floating point register
+ *     S -> double precision floating point register
+ *     f -> single or double precision register (depending on instruction width)
+ *     I -> 8-bit immediate floating point number
+ *     l -> logical immediate
+ *     M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
+ *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
+ *     H -> operand shift
+ *     T -> register shift (either ", lsl #0" or ", lsl #12")
+ *     e -> register extend (e.g. uxtb #1)
+ *     o -> register shift (e.g. lsl #1) for Word registers
+ *     w -> word (32-bit) register wn, or wzr
+ *     W -> word (32-bit) register wn, or wsp
+ *     x -> extended (64-bit) register xn, or xzr
+ *     X -> extended (64-bit) register xn, or sp
+ *     r -> register with same width as instruction, r31 -> wzr, xzr
+ *     R -> register with same width as instruction, r31 -> wsp, sp
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */
+const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
+    ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "adc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
+                 "add", "!0r, !1r, !2r!3o", kFixupNone),
+    // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
+    //   fixups and contains information to identify the adr label.
+    ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
+                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "adr", "!0x, #!1d", kFixupAdr),
+    ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "and", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "and", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "asr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "asr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000),
+                 kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
+                 NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch),
+    ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
+                 "blr", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000),
+                 kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
+                 "br", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
+                 kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "brk", "!0d", kFixupNone),
+    ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000),
+                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
+                 "b", "!0t", kFixupT1Branch),
+    ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "cbnz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH  | NEEDS_FIXUP,
+                 "cbz", "!0r, !1t", kFixupCBxZ),
+    ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmn3Rre), SF_VARIANTS(0x2b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0R, !1r!2e", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3rro), SF_VARIANTS(0x6b00001f),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3Rre), SF_VARIANTS(0x6b20001f),
+                 kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0R, !1r!2e", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
+                 kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0R, #!1d!2T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csel", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
+    ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
+                 kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 "dmb", "#!0B", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "eor", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "eor", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
+                 "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
+                 "fabs", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fadd", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
+                 kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
+                 "fcmp", "!0f, #0", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
+                 kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "fcmp", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
+                 kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0w, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
+                 kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvtzs", "!0x, !1f", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000),
+                 kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0S, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000),
+                 kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fcvt", "!0s, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fdiv", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "fmov", "!0f, #!1I", kFixupNone),
+    ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000),
+                 kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0s, !1w", kFixupNone),
+    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+                 kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0S, !1x", kFixupNone),
+    ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000),
+                 kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0w, !1s", kFixupNone),
+    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000),
+                 kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmov", "!0x, !1S", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmul", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fneg", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "frintz", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fsqrt", "!0f, !1f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fsub", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
+                 kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0f, !1p", kFixupLoad),
+    ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldr", "!0r, !1p", kFixupLoad),
+    ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
+                 "ldr", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
+                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+                 "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldxr", "!0r, [!1X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsl", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsr", "!0r, !1r, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsr", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
+                 "movk", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movn", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000),
+                 kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "movz", "!0r, #!1d!2M", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mvn", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 14, 10,
+                 kFmtRegR, 20, 16, IS_QUAD_OP | REG_DEF0_USE123,
+                 "msub", "!0r, !1r, !3r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
+                 kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "neg", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orr", "!0R, !1r, #!2l", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "orr", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0),
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 "ret", "", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0),
+                 kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev16", "!0r, !1r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "ror", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sbc", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
+                 kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1w", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
+                 kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "scvtf", "!0f, !1x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sdiv", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
+                 kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
+                 kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+                 "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
+                 kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000),
+                 kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+                 kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+                 "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0f, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0f, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0r, [!1X, #!2D]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0r, [!1X, !2x!3G]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0w, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0w, [!1X, !2x]", kFixupNone),
+    ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0w, [!1X, #!2F]", kFixupNone),
+    ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
+                 kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+                 kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
+                 "str", "!0r, [!1X], #!2d", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
+                 kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0f, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000),
+                 kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "stur", "!0r, [!1X, #!2d]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
+                 kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+                 "stxr", "!0w, !1r, [!2X]", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sub", "!0R, !1R, #!2d!3T", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
+                 kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0r, !1R, #!2d", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+                 kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+                 kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+                 "ubfm", "!0r, !1r, !2d, !3d", kFixupNone),
+};
+
+// new_lir replaces orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+  orig_lir->flags.fixup = kFixupNone;
+}
+
+// new_lir is inserted before orig_lir in the pcrel_fixup list.
+void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+}
+
+/* Nop, used for aligning code. Nop is an alias for hint #0. */
+#define PADDING_NOP (UINT32_C(0xd503201f))
+
+uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+    bool opcode_is_wide = IS_WIDE(lir->opcode);
+    ArmOpcode opcode = UNWIDE(lir->opcode);
+
+    if (UNLIKELY(IsPseudoLirOp(opcode))) {
+      continue;
+    }
+
+    if (LIKELY(!lir->flags.is_nop)) {
+      const ArmEncodingMap *encoder = &EncodingMap[opcode];
+
+      // Select the right variant of the skeleton.
+      uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+      DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
+
+      for (int i = 0; i < 4; i++) {
+        ArmEncodingKind kind = encoder->field_loc[i].kind;
+        uint32_t operand = lir->operands[i];
+        uint32_t value;
+
+        if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) {
+          // Note: this will handle kFmtReg* and kFmtBitBlt.
+
+          if (static_cast<unsigned>(kind) < kFmtBitBlt) {
+            bool is_zero = A64_REG_IS_ZR(operand);
+
+            if (kIsDebugBuild) {
+              // Register usage checks: First establish register usage requirements based on the
+              // format in `kind'.
+              bool want_float = false;
+              bool want_64_bit = false;
+              bool want_size_match = false;
+              bool want_zero = false;
+              switch (kind) {
+                case kFmtRegX:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegW:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegR:
+                  want_zero = true;
+                  break;
+                case kFmtRegXOrSp:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegWOrSp:
+                  want_size_match = true;
+                  break;
+                case kFmtRegROrSp:
+                  break;
+                case kFmtRegD:
+                  want_64_bit = true;
+                  // Intentional fall-through.
+                case kFmtRegS:
+                  want_size_match = true;
+                  // Intentional fall-through.
+                case kFmtRegF:
+                  want_float = true;
+                  break;
+                default:
+                  LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name
+                             << " (" << kind << ")";
+                  break;
+              }
+
+              // Now check that the requirements are satisfied.
+              RegStorage reg(operand | RegStorage::kValid);
+              const char *expected = nullptr;
+              if (want_float) {
+                if (!reg.IsFloat()) {
+                  expected = "float register";
+                } else if (want_size_match && (reg.IsDouble() != want_64_bit)) {
+                  expected = (want_64_bit) ? "double register" : "single register";
+                }
+              } else {
+                if (reg.IsFloat()) {
+                  expected = "core register";
+                } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) {
+                  expected = (want_64_bit) ? "x-register" : "w-register";
+                } else if (reg.GetRegNum() == 31 && is_zero != want_zero) {
+                  expected = (want_zero) ? "zero-register" : "sp-register";
+                }
+              }
+
+              // TODO(Arm64): if !want_size_match, then we still should compare the size of the
+              //   register with the size required by the instruction width (kA64Wide).
+
+              // Fail, if `expected' contains an unsatisfied requirement.
+              if (expected != nullptr) {
+                // TODO(Arm64): make this FATAL.
+                LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
+                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
+              }
+            }
+
+            // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are
+            //   defined.
+            if (is_zero) {
+              operand = 31;
+            }
+          }
+
+          value = (operand << encoder->field_loc[i].start) &
+              ((1 << (encoder->field_loc[i].end + 1)) - 1);
+          bits |= value;
+        } else {
+          switch (kind) {
+            case kFmtSkip:
+              break;  // Nothing to do, but continue to next.
+            case kFmtUnused:
+              i = 4;  // Done, break out of the enclosing loop.
+              break;
+            case kFmtShift:
+              // Intentional fallthrough.
+            case kFmtExtend:
+              DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift);
+              value = (operand & 0x3f) << 10;
+              value |= ((operand & 0x1c0) >> 6) << 21;
+              bits |= value;
+              break;
+            case kFmtImm21:
+              value = (operand & 0x3) << 29;
+              value |= ((operand & 0x1ffffc) >> 2) << 5;
+              bits |= value;
+              break;
+            default:
+              LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
+                         << " (" << kind << ")";
+          }
+        }
+      }
+
+      DCHECK_EQ(encoder->size, 4);
+      write_pos[0] = (bits & 0xff);
+      write_pos[1] = ((bits >> 8) & 0xff);
+      write_pos[2] = ((bits >> 16) & 0xff);
+      write_pos[3] = ((bits >> 24) & 0xff);
+      write_pos += 4;
+    }
+  }
+
+  return write_pos;
+}
+
+// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates
+// are better set directly from the code (they will require no more than 2 instructions).
+#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7)
+
+// Assemble the LIR into binary instruction format.
+void Arm64Mir2Lir::AssembleLIR() {
+  LIR* lir;
+  LIR* prev_lir;
+  cu_->NewTimingSplit("Assemble");
+  int assembler_retries = 0;
+  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
+  data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
+  int32_t offset_adjustment;
+  AssignDataOffsets();
+
+  /*
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0
+   * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   */
+  int generation = 0;
+  while (true) {
+    // TODO(Arm64): check whether passes and offset adjustments are really necessary.
+    //   Currently they aren't, as - in the fixups below - LIR are never inserted.
+    //   Things can be different if jump ranges above 1 MB need to be supported.
+    //   If they are not, then we can get rid of the assembler retry logic.
+
+    offset_adjustment = 0;
+    AssemblerStatus res = kSuccess;  // Assume success
+    generation ^= 1;
+    // Note: nodes requiring possible fixup linked in ascending order.
+    lir = first_fixup_;
+    prev_lir = NULL;
+    while (lir != NULL) {
+      /*
+       * NOTE: the lir being considered here will be encoded following the switch (so long as
+       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
+       * due to retry must be explicitly encoded at the time of insertion.  Note that
+       * inserted instructions don't need use/def flags, but do need size and pc-rel status
+       * properly updated.
+       */
+      lir->offset += offset_adjustment;
+      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
+      lir->flags.generation = generation;
+      switch (static_cast<FixupKind>(lir->flags.fixup)) {
+        case kFixupLabel:
+        case kFixupNone:
+        case kFixupVLoad:
+          break;
+        case kFixupT1Branch: {
+          LIR *target_lir = lir->target;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
+          }
+          lir->operands[0] = delta >> 2;
+          break;
+        }
+        case kFixupLoad:
+        case kFixupCBxZ:
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          DCHECK(target_lir);
+          CodeOffset pc = lir->offset;
+          CodeOffset target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int32_t delta = target - pc;
+          if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+            LOG(FATAL) << "Invalid jump range in kFixupLoad";
+          }
+          lir->operands[1] = delta >> 2;
+          break;
+        }
+        case kFixupAdr: {
+          LIR* target_lir = lir->target;
+          int32_t delta;
+          if (target_lir) {
+            CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ?
+                                      0 : offset_adjustment) + target_lir->offset;
+            delta = target_offs - lir->offset;
+          } else if (lir->operands[2] >= 0) {
+            EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+            delta = tab->offset + offset_adjustment - lir->offset;
+          } else {
+            // No fixup: this usage allows to retrieve the current PC.
+            delta = lir->operands[1];
+          }
+          if (!IS_SIGNED_IMM21(delta)) {
+            LOG(FATAL) << "Jump range above 1MB in kFixupAdr";
+          }
+          lir->operands[1] = delta;
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
+      }
+      prev_lir = lir;
+      lir = lir->u.a.pcrel_next;
+    }
+
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      starting_offset += offset_adjustment;
+      data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
+      AssignDataOffsets();
+    }
+  }
+
+  // Build the CodeBuffer.
+  DCHECK_LE(data_offset_, total_size_);
+  code_buffer_.reserve(total_size_);
+  code_buffer_.resize(starting_offset);
+  uint8_t* write_pos = &code_buffer_[0];
+  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
+  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
+
+  DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size()));
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  cu_->NewTimingSplit("PcMappingTable");
+  CreateMappingTables();
+
+  cu_->NewTimingSplit("GcMap");
+  CreateNativeGcMap();
+}
+
+int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
+  ArmOpcode opcode = UNWIDE(lir->opcode);
+  DCHECK(!IsPseudoLirOp(opcode));
+  return EncodingMap[opcode].size;
+}
+
+// Encode instruction bit pattern and assign offsets.
+uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+  LIR* end_lir = tail_lir->next;
+
+  LIR* last_fixup = NULL;
+  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    ArmOpcode opcode = UNWIDE(lir->opcode);
+    if (!lir->flags.is_nop) {
+      if (lir->flags.fixup != kFixupNone) {
+        if (!IsPseudoLirOp(opcode)) {
+          lir->flags.size = EncodingMap[opcode].size;
+          lir->flags.fixup = EncodingMap[opcode].fixup;
+        } else {
+          DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4);
+          lir->flags.size = 0;
+          lir->flags.fixup = kFixupLabel;
+        }
+        // Link into the fixup chain.
+        lir->flags.use_def_invalid = true;
+        lir->u.a.pcrel_next = NULL;
+        if (first_fixup_ == NULL) {
+          first_fixup_ = lir;
+        } else {
+          last_fixup->u.a.pcrel_next = lir;
+        }
+        last_fixup = lir;
+        lir->offset = offset;
+      }
+      offset += lir->flags.size;
+    }
+  }
+  return offset;
+}
+
+void Arm64Mir2Lir::AssignDataOffsets() {
+  /* Set up offsets for literals */
+  CodeOffset offset = data_offset_;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  total_size_ = AssignFillArrayDataOffset(offset);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
new file mode 100644
index 0000000..2e3ef86
--- /dev/null
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+
+namespace art {
+
+bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
+                                  const InlineMethod& special) {
+  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
+  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
+  // return Mir2Lir::GenSpecialCase(bb, mir, special);
+  return false;
+}
+
+/*
+ * The sparse table in the literal pool is an array of <key,displacement>
+ * pairs.  For each set, we'll load them as a pair using ldp.
+ * The test loop will look something like:
+ *
+ *   adr   r_base, <table>
+ *   ldr   r_val, [rA64_SP, v_reg_off]
+ *   mov   r_idx, #table_size
+ * loop:
+ *   cbz   r_idx, quit
+ *   ldp   r_key, r_disp, [r_base], #8
+ *   sub   r_idx, #1
+ *   cmp   r_val, r_key
+ *   b.ne  loop
+ *   adr   r_base, #0        ; This is the instruction from which we compute displacements
+ *   add   r_base, r_disp
+ *   br    r_base
+ * quit:
+ */
+void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
+                                   RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpSparseSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage r_base = AllocTemp();
+  // Allocate key and disp temps.
+  RegStorage r_key = AllocTemp();
+  RegStorage r_disp = AllocTemp();
+  // Materialize a pointer to the switch table
+  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
+  // Set up r_idx
+  RegStorage r_idx = AllocTemp();
+  LoadConstant(r_idx, size);
+
+  // Entry of loop.
+  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
+  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
+
+  // Load next key/disp.
+  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
+  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
+
+  // Go to next case, if key does not match.
+  OpRegReg(kOpCmp, r_key, rl_src.reg);
+  OpCondBranch(kCondNe, loop_entry);
+
+  // Key does match: branch to case label.
+  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
+
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT);
+  NewLIR1(kA64Br1x, r_base.GetReg());
+
+  // Loop exit label.
+  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
+  branch_out->target = loop_exit;
+}
+
+
+void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
+                                 RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpPackedSwitchTable(table);
+  }
+  // Add the table to the list - we'll process it later
+  SwitchTable *tab_rec =
+      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint32_t size = table[1];
+  tab_rec->targets =
+      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
+  switch_tables_.Insert(tab_rec);
+
+  // Get the switch value
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegStorage table_base = AllocTemp();
+  // Materialize a pointer to the switch table
+  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
+  int low_key = s4FromSwitchData(&table[2]);
+  RegStorage key_reg;
+  // Remove the bias, if necessary
+  if (low_key == 0) {
+    key_reg = rl_src.reg;
+  } else {
+    key_reg = AllocTemp();
+    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
+  }
+  // Bounds check - if < 0 or >= size continue following switch
+  OpRegImm(kOpCmp, key_reg, size - 1);
+  LIR* branch_over = OpCondBranch(kCondHi, NULL);
+
+  // Load the displacement from the switch table
+  RegStorage disp_reg = AllocTemp();
+  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
+
+  // Get base branch address.
+  RegStorage branch_reg = AllocTemp();
+  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
+  tab_rec->anchor = switch_label;
+
+  // Add displacement to base branch address and go!
+  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT);
+  NewLIR1(kA64Br1x, branch_reg.GetReg());
+
+  // branch_over target here
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+}
+
+/*
+ * Array data table format:
+ *  ushort ident = 0x0300   magic value
+ *  ushort width            width of each element in the table
+ *  uint   size             number of elements in the table
+ *  ubyte  data[size*width] table of data values (may contain a single-byte
+ *                          padding at the end)
+ *
+ * Total size is 4+(width * size + 1)/2 16-bit code units.
+ */
+void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  // Add the table to the list - we'll process it later
+  FillArrayData *tab_rec =
+      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
+  tab_rec->table = table;
+  tab_rec->vaddr = current_dalvik_offset_;
+  uint16_t width = tab_rec->table[1];
+  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
+  tab_rec->size = (size * width) + 8;
+
+  fill_array_data_.Insert(tab_rec);
+
+  // Making a call - use explicit registers
+  FlushAllRegs();   /* Everything to home location */
+  LoadValueDirectFixed(rl_src, rs_x0);
+  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
+               rs_rA64_LR);
+  // Materialize a pointer to the fill data image
+  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+}
+
+/*
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
+ */
+void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  // x0/w0 = object
+  // w1    = thin lock thread id
+  // x2    = address of lock word
+  // w3    = lock word / store failure
+  // TUNING: How much performance we get when we inline this?
+  // Since we've already flush all register.
+  FlushAllRegs();
+  LoadValueDirectFixed(rl_src, rs_w0);
+  LockCallTemps();  // Prepare for explicit register usage
+  LIR* null_check_branch = nullptr;
+  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+    null_check_branch = nullptr;  // No null check.
+  } else {
+    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+    }
+  }
+  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
+  NewLIR2(kA64Ldxr2rX, rw3, rx2);
+  MarkPossibleNullPointerException(opt_flags);
+  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
+  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
+
+  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+  not_unlocked_branch->target = slow_path_target;
+  if (null_check_branch != nullptr) {
+    null_check_branch->target = slow_path_target;
+  }
+  // TODO: move to a slow path.
+  // Go expensive route - artLockObjectFromCode(obj);
+  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+
+  LIR* success_target = NewLIR0(kPseudoTargetLabel);
+  lock_success_branch->target = success_target;
+  GenMemBarrier(kLoadLoad);
+}
+
+/*
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
+ * and can only give away ownership if its suspended.
+ */
+void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  // x0/w0 = object
+  // w1    = thin lock thread id
+  // w2    = lock word
+  // TUNING: How much performance we get when we inline this?
+  // Since we've already flush all register.
+  FlushAllRegs();
+  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
+  LockCallTemps();  // Prepare for explicit register usage
+  LIR* null_check_branch = nullptr;
+  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+    null_check_branch = nullptr;  // No null check.
+  } else {
+    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+    }
+  }
+  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+  MarkPossibleNullPointerException(opt_flags);
+  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
+  GenMemBarrier(kStoreLoad);
+  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr);
+  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+  slow_unlock_branch->target = slow_path_target;
+  if (null_check_branch != nullptr) {
+    null_check_branch->target = slow_path_target;
+  }
+  // TODO: move to a slow path.
+  // Go expensive route - artUnlockObjectFromCode(obj);
+  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+
+  LIR* success_target = NewLIR0(kPseudoTargetLabel);
+  unlock_success_branch->target = success_target;
+}
+
+void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
+  int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
+  StoreValue(rl_dest, rl_result);
+}
+
+/*
+ * Mark garbage collection card. Skip if the value we're storing is null.
+ */
+void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
+  RegStorage reg_card_base = AllocTemp();
+  RegStorage reg_card_no = AllocTemp();
+  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
+  LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
+  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
+  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  branch_over->target = target;
+  FreeTemp(reg_card_base);
+  FreeTemp(reg_card_no);
+}
+
+void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  /*
+   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
+   * mechanism know so it doesn't try to use any of them when
+   * expanding the frame or flushing.  This leaves the utility
+   * code with a single temp: r12.  This should be enough.
+   */
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
+
+  /*
+   * We can safely skip the stack overflow check if we're
+   * a leaf *and* our frame size < fudge factor.
+   */
+  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
+                            (static_cast<size_t>(frame_size_) <
+                            Thread::kStackOverflowReservedBytes));
+  NewLIR0(kPseudoMethodEntry);
+
+  if (!skip_overflow_check) {
+    LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      /* Load stack limit */
+      // TODO(Arm64): fix the line below:
+      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
+      Load32Disp(rs_rA64_SP, 0, rs_wzr);
+      MarkPossibleStackOverflowException();
+    }
+  } else if (frame_size_ > 0) {
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
+  }
+
+  /* Need to spill any FP regs? */
+  if (fp_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
+  }
+
+  /* Spill core callee saves. */
+  if (core_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
+  }
+
+  FlushIns(ArgLocs, rl_method);
+
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
+}
+
+void Arm64Mir2Lir::GenExitSequence() {
+  /*
+   * In the exit path, r0/r1 are live - make sure they aren't
+   * allocated by the register utilities as temps.
+   */
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+
+  NewLIR0(kPseudoMethodExit);
+
+  /* Need to restore any FP callee saves? */
+  if (fp_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
+  }
+  if (core_spill_mask_) {
+    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
+  }
+
+  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_);
+  NewLIR0(kA64Ret);
+}
+
+void Arm64Mir2Lir::GenSpecialExitSequence() {
+  NewLIR0(kA64Ret);
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
new file mode 100644
index 0000000..fddbfd7
--- /dev/null
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+#define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
+
+#include "arm64_lir.h"
+#include "dex/compiler_internals.h"
+
+namespace art {
+
+class Arm64Mir2Lir : public Mir2Lir {
+  public:
+    Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+
+    // Required for target - codegen helpers.
+    bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
+                            RegLocation rl_dest, int lit);
+    bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    LIR* CheckSuspendUsingLoad() OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) OVERRIDE;
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
+    LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
+                         OpSize size) OVERRIDE;
+    LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                             RegStorage r_dest, OpSize size) OVERRIDE;
+    LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
+    LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                               OpSize size) OVERRIDE;
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
+    LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
+                          OpSize size) OVERRIDE;
+    LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                              RegStorage r_src, OpSize size) OVERRIDE;
+    void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+
+    // Required for target - register utilities.
+    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    RegLocation GetReturnAlt();
+    RegLocation GetReturnWideAlt();
+    RegLocation LocCReturn();
+    RegLocation LocCReturnDouble();
+    RegLocation LocCReturnFloat();
+    RegLocation LocCReturnWide();
+    uint64_t GetRegMaskCommon(RegStorage reg);
+    void AdjustSpillMask();
+    void ClobberCallerSave();
+    void FreeCallTemps();
+    void LockCallTemps();
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
+    void CompilerInitializeRegAlloc();
+
+    // Required for target - miscellaneous.
+    void AssembleLIR();
+    uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
+    int AssignInsnOffsets();
+    void AssignOffsets();
+    uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
+    void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
+    const char* GetTargetInstFmt(int opcode);
+    const char* GetTargetInstName(int opcode);
+    std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+    uint64_t GetPCUseDefEncoding();
+    uint64_t GetTargetInstFlags(int opcode);
+    int GetInsnSize(LIR* lir);
+    bool IsUnconditionalBranch(LIR* lir);
+
+    // Check support for volatile load/store of a given size.
+    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+    // Get the register class for load/store of a field.
+    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+
+    // Required for target - Dalvik-level generators.
+    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation lr_shift);
+    void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2);
+    void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                     RegLocation rl_index, RegLocation rl_dest, int scale);
+    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                     RegLocation rl_src, int scale, bool card_mark);
+    void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_shift);
+    void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2);
+    void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                         RegLocation rl_src2);
+    void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2);
+    void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
+    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedSqrt(CallInfo* info);
+    bool GenInlinedPeek(CallInfo* info, OpSize size);
+    bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                   RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                    RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
+    RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
+    void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenDivZeroCheckWide(RegStorage reg);
+    void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
+    void GenExitSequence();
+    void GenSpecialExitSequence();
+    void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
+    void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
+    void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
+    void GenSelect(BasicBlock* bb, MIR* mir);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
+    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    void GenMonitorExit(int opt_flags, RegLocation rl_src);
+    void GenMoveException(RegLocation rl_dest);
+    void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                       int first_bit, int second_bit);
+    void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
+    void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
+    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
+    uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
+    void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
+    void SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
+
+    // Required for target - single operation generators.
+    LIR* OpUnconditionalBranch(LIR* target);
+    LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
+    LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
+    LIR* OpCondBranch(ConditionCode cc, LIR* target);
+    LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
+    LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpIT(ConditionCode cond, const char* guide);
+    void OpEndIT(LIR* it);
+    LIR* OpMem(OpKind op, RegStorage r_base, int disp);
+    LIR* OpPcRelLoad(RegStorage reg, LIR* target);
+    LIR* OpReg(OpKind op, RegStorage r_dest_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value);
+    LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
+    LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+    LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
+    LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
+    LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
+    LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
+    LIR* OpTestSuspend(LIR* target);
+    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
+    LIR* OpVldm(RegStorage r_base, int count);
+    LIR* OpVstm(RegStorage r_base, int count);
+    void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+    void OpRegCopyWide(RegStorage dest, RegStorage src);
+    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
+
+    LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
+    LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+    LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                          int shift);
+    LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
+    static const ArmEncodingMap EncodingMap[kA64Last];
+    int EncodeShift(int code, int amount);
+    int EncodeExtend(int extend_type, int amount);
+    bool IsExtendEncoding(int encoded_value);
+    int EncodeLogicalImmediate(bool is_wide, uint64_t value);
+    uint64_t DecodeLogicalImmediate(bool is_wide, int value);
+
+    ArmConditionCode ArmConditionEncoding(ConditionCode code);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
+
+    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    int LoadArgRegs(CallInfo* info, int call_state,
+                    NextCallInsn next_call_insn,
+                    const MethodReference& target_method,
+                    uint32_t vtable_idx,
+                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                    bool skip_this);
+
+  private:
+    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
+                                  ConditionCode ccode);
+    LIR* LoadFPConstantValue(int r_dest, int32_t value);
+    LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
+    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                          bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
new file mode 100644
index 0000000..87ab6fe
--- /dev/null
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  /*
+   * Don't attempt to optimize register usage since these opcodes call out to
+   * the handlers.
+   */
+  switch (opcode) {
+    case Instruction::ADD_FLOAT_2ADDR:
+    case Instruction::ADD_FLOAT:
+      op = kA64Fadd3fff;
+      break;
+    case Instruction::SUB_FLOAT_2ADDR:
+    case Instruction::SUB_FLOAT:
+      op = kA64Fsub3fff;
+      break;
+    case Instruction::DIV_FLOAT_2ADDR:
+    case Instruction::DIV_FLOAT:
+      op = kA64Fdiv3fff;
+      break;
+    case Instruction::MUL_FLOAT_2ADDR:
+    case Instruction::MUL_FLOAT:
+      op = kA64Fmul3fff;
+      break;
+    case Instruction::REM_FLOAT_2ADDR:
+    case Instruction::REM_FLOAT:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturn(true);
+      StoreValue(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_FLOAT:
+      GenNegFloat(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  rl_src1 = LoadValue(rl_src1, kFPReg);
+  rl_src2 = LoadValue(rl_src2, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
+                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::ADD_DOUBLE:
+      op = kA64Fadd3fff;
+      break;
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE:
+      op = kA64Fsub3fff;
+      break;
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE:
+      op = kA64Fdiv3fff;
+      break;
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE:
+      op = kA64Fmul3fff;
+      break;
+    case Instruction::REM_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE:
+      FlushAllRegs();   // Send everything to home location
+      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
+                                              false);
+      rl_result = GetReturnWide(true);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    case Instruction::NEG_DOUBLE:
+      GenNegDouble(rl_dest, rl_src1);
+      return;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kFPReg);
+  DCHECK(rl_src1.wide);
+  rl_src2 = LoadValueWide(rl_src2, kFPReg);
+  DCHECK(rl_src2.wide);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_result.wide);
+  NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
+                                 RegLocation rl_dest, RegLocation rl_src) {
+  int op = kA64Brk1d;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::INT_TO_FLOAT:
+      op = kA64Scvtf2fw;
+      break;
+    case Instruction::FLOAT_TO_INT:
+      op = kA64Fcvtzs2wf;
+      break;
+    case Instruction::DOUBLE_TO_FLOAT:
+      op = kA64Fcvt2sS;
+      break;
+    case Instruction::FLOAT_TO_DOUBLE:
+      op = kA64Fcvt2Ss;
+      break;
+    case Instruction::INT_TO_DOUBLE:
+      op = FWIDE(kA64Scvtf2fw);
+      break;
+    case Instruction::DOUBLE_TO_INT:
+      op = FWIDE(kA64Fcvtzs2wf);
+      break;
+    case Instruction::LONG_TO_DOUBLE:
+      op = FWIDE(kA64Scvtf2fx);
+      break;
+    case Instruction::FLOAT_TO_LONG:
+      op = kA64Fcvtzs2xf;
+      break;
+    case Instruction::LONG_TO_FLOAT:
+      op = kA64Scvtf2fx;
+      break;
+    case Instruction::DOUBLE_TO_LONG:
+      op = FWIDE(kA64Fcvtzs2xf);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+
+  if (rl_src.wide) {
+    rl_src = LoadValueWide(rl_src, kFPReg);
+  } else {
+    rl_src = LoadValue(rl_src, kFPReg);
+  }
+
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+
+  if (rl_dest.wide) {
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
+                                     bool is_double) {
+  LIR* target = &block_label_list_[bb->taken];
+  RegLocation rl_src1;
+  RegLocation rl_src2;
+  if (is_double) {
+    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = mir_graph_->GetSrc(mir, 0);
+    rl_src2 = mir_graph_->GetSrc(mir, 1);
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  ConditionCode ccode = mir->meta.ccode;
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      break;
+    case kCondLt:
+      if (gt_bias) {
+        ccode = kCondMi;
+      }
+      break;
+    case kCondLe:
+      if (gt_bias) {
+        ccode = kCondLs;
+      }
+      break;
+    case kCondGt:
+      if (gt_bias) {
+        ccode = kCondHi;
+      }
+      break;
+    case kCondGe:
+      if (gt_bias) {
+        ccode = kCondUge;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCondBranch(ccode, target);
+}
+
+
+void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  bool is_double = false;
+  int default_result = -1;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::CMPL_FLOAT:
+      is_double = false;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_FLOAT:
+      is_double = false;
+      default_result = 1;
+      break;
+    case Instruction::CMPL_DOUBLE:
+      is_double = true;
+      default_result = -1;
+      break;
+    case Instruction::CMPG_DOUBLE:
+      is_double = true;
+      default_result = 1;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+  }
+  if (is_double) {
+    rl_src1 = LoadValueWide(rl_src1, kFPReg);
+    rl_src2 = LoadValueWide(rl_src2, kFPReg);
+    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  } else {
+    rl_src1 = LoadValue(rl_src1, kFPReg);
+    rl_src2 = LoadValue(rl_src2, kFPReg);
+    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
+    ClobberSReg(rl_dest.s_reg_low);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    LoadConstant(rl_result.reg, default_result);
+    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  }
+  DCHECK(!rl_result.reg.IsFloat());
+
+  // TODO(Arm64): should we rather do this?
+  // csinc wD, wzr, wzr, eq
+  // csneg wD, wD, wD, le
+  // (which requires 2 instructions rather than 3)
+
+  // Rd = if cond then Rd else -Rd.
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
+  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
+          kArmCondEq);
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValue(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
+bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64";
+
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  LIR *branch;
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
+  rl_src = LoadValueWide(rl_src, kFPReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg());
+  branch = NewLIR2(kA64B2ct, kArmCondEq, 0);
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pSqrt));
+  // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg());
+  NewLIR1(kA64Blr1x, r_tgt.GetReg());
+  // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1);
+  branch->target = NewLIR0(kPseudoTargetLabel);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
new file mode 100644
index 0000000..38f110e
--- /dev/null
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -0,0 +1,1228 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains codegen for the Thumb2 ISA. */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/array.h"
+
+namespace art {
+
+LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
+  OpRegReg(kOpCmp, src1, src2);
+  return OpCondBranch(cond, target);
+}
+
+// TODO(Arm64): remove this.
+LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
+  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
+  return NULL;
+}
+
+void Arm64Mir2Lir::OpEndIT(LIR* it) {
+  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
+}
+
+/*
+ * 64-bit 3way compare function.
+ *     cmp   xA, xB
+ *     csinc wC, wzr, wzr, eq
+ *     csneg wC, wC, wC, le
+ */
+void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq);
+  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), kArmCondLe);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_shift) {
+  OpKind op = kOpBkpt;
+  switch (opcode) {
+  case Instruction::SHL_LONG:
+  case Instruction::SHL_LONG_2ADDR:
+    op = kOpLsl;
+    break;
+  case Instruction::SHR_LONG:
+  case Instruction::SHR_LONG_2ADDR:
+    op = kOpAsr;
+    break;
+  case Instruction::USHR_LONG:
+  case Instruction::USHR_LONG_2ADDR:
+    op = kOpLsr;
+    break;
+  default:
+    LOG(FATAL) << "Unexpected case: " << opcode;
+  }
+  rl_shift = LoadValueWide(rl_shift, kCoreReg);
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                            int64_t val, ConditionCode ccode) {
+  LIR* taken = &block_label_list_[bb->taken];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+
+  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
+    ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
+    branch->target = taken;
+  } else {
+    OpRegImm64(kOpCmp, rl_src1.reg, val);
+    OpCondBranch(ccode, taken);
+  }
+}
+
+void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  ConditionCode ccode = mir->meta.ccode;
+  if (mir->ssa_rep->num_uses == 1) {
+    // CONST case
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    // Change kCondNe to kCondEq for the special cases below.
+    if (ccode == kCondNe) {
+      ccode = kCondEq;
+      std::swap(true_val, false_val);
+    }
+    bool cheap_false_val = InexpensiveConstantInt(false_val);
+    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
+      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
+      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
+      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+      OpIT(kCondLs, "");
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      OpIT(ccode, "E");
+      LoadConstant(rl_result.reg, true_val);
+      LoadConstant(rl_result.reg, false_val);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    } else {
+      // Unlikely case - could be tuned.
+      RegStorage t_reg1 = AllocTemp();
+      RegStorage t_reg2 = AllocTemp();
+      LoadConstant(t_reg1, true_val);
+      LoadConstant(t_reg2, false_val);
+      OpRegImm(kOpCmp, rl_src.reg, 0);
+      OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, t_reg1);
+      OpRegCopy(rl_result.reg, t_reg2);
+      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+    }
+  } else {
+    // MOVE case
+    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegImm(kOpCmp, rl_src.reg, 0);
+    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
+      OpIT(NegateComparison(ccode), "");
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
+      OpIT(ccode, "");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+    } else {  // Normal - select between the two.
+      OpIT(ccode, "E");
+      OpRegCopy(rl_result.reg, rl_true.reg);
+      OpRegCopy(rl_result.reg, rl_false.reg);
+    }
+    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
+  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+  // Normalize such that if either operand is constant, src2 will be constant.
+  ConditionCode ccode = mir->meta.ccode;
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+    ccode = FlipComparisonOrder(ccode);
+  }
+  if (rl_src2.is_const) {
+    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    // Do special compare/branch against simple const operand if not already in registers.
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    if ((rl_temp.location != kLocPhysReg)
+     /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
+      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
+      return;
+    }
+  }
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
+  switch (ccode) {
+    case kCondEq:
+      OpCondBranch(kCondNe, not_taken);
+      break;
+    case kCondNe:
+      OpCondBranch(kCondNe, taken);
+      break;
+    case kCondLt:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondUlt;
+      break;
+    case kCondLe:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondUge;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
+  OpCondBranch(ccode, taken);
+}
+
+/*
+ * Generate a register comparison to an immediate and branch.  Caller
+ * is responsible for setting branch target field.
+ */
+LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
+                                  LIR* target) {
+  LIR* branch;
+  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
+  if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
+    ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+    ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+    branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
+  } else {
+    OpRegImm(kOpCmp, reg, check_value);
+    branch = NewLIR2(kA64B2ct, arm_cond, 0);
+  }
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
+  bool dest_is_fp = r_dest.IsFloat();
+  bool src_is_fp = r_src.IsFloat();
+  ArmOpcode opcode = kA64Brk1d;
+  LIR* res;
+
+  if (LIKELY(dest_is_fp == src_is_fp)) {
+    if (LIKELY(!dest_is_fp)) {
+      // Core/core copy.
+      // Copies involving the sp register require a different instruction.
+      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
+
+      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
+      //   This currently works because the other arguments are set to 0 by default. We should
+      //   rather introduce an alias kA64Mov2RR.
+
+      // core/core copy. Do a x/x copy only if both registers are x.
+      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
+        opcode = WIDE(opcode);
+      }
+    } else {
+      // Float/float copy.
+      bool dest_is_double = r_dest.IsDouble();
+      bool src_is_double = r_src.IsDouble();
+
+      // We do not do float/double or double/float casts here.
+      DCHECK_EQ(dest_is_double, src_is_double);
+
+      // Homogeneous float/float copy.
+      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
+    }
+  } else {
+    // Inhomogeneous register copy.
+    if (dest_is_fp) {
+      if (r_dest.IsDouble()) {
+        opcode = kA64Fmov2Sx;
+      } else {
+        DCHECK(r_src.IsSingle());
+        opcode = kA64Fmov2sw;
+      }
+    } else {
+      if (r_src.IsDouble()) {
+        opcode = kA64Fmov2xS;
+      } else {
+        DCHECK(r_dest.Is32Bit());
+        opcode = kA64Fmov2ws;
+      }
+    }
+  }
+
+  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+
+  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
+    res->flags.is_nop = true;
+  }
+
+  return res;
+}
+
+void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
+}
+
+void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
+  OpRegCopy(r_dest, r_src);
+}
+
+// Table of magic divisors
+struct MagicTable {
+  uint32_t magic;
+  uint32_t shift;
+  DividePattern pattern;
+};
+
+static const MagicTable magic_table[] = {
+  {0, 0, DivideNone},        // 0
+  {0, 0, DivideNone},        // 1
+  {0, 0, DivideNone},        // 2
+  {0x55555556, 0, Divide3},  // 3
+  {0, 0, DivideNone},        // 4
+  {0x66666667, 1, Divide5},  // 5
+  {0x2AAAAAAB, 0, Divide3},  // 6
+  {0x92492493, 2, Divide7},  // 7
+  {0, 0, DivideNone},        // 8
+  {0x38E38E39, 1, Divide5},  // 9
+  {0x66666667, 2, Divide5},  // 10
+  {0x2E8BA2E9, 1, Divide5},  // 11
+  {0x2AAAAAAB, 1, Divide5},  // 12
+  {0x4EC4EC4F, 2, Divide5},  // 13
+  {0x92492493, 3, Divide7},  // 14
+  {0x88888889, 3, Divide7},  // 15
+};
+
+// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
+bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
+  // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
+  //   It should be possible subtracting one from all its entries, and using smaddl
+  //   to counteract this. The advantage is that integers should then be easier to
+  //   encode as logical immediates (0x55555555 rather than 0x55555556).
+  UNIMPLEMENTED(FATAL);
+
+  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
+    return false;
+  }
+  DividePattern pattern = magic_table[lit].pattern;
+  if (pattern == DivideNone) {
+    return false;
+  }
+  // Tuning: add rem patterns
+  if (!is_div) {
+    return false;
+  }
+
+  RegStorage r_magic = AllocTemp();
+  LoadConstant(r_magic, magic_table[lit].magic);
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegStorage r_hi = AllocTemp();
+  RegStorage r_lo = AllocTemp();
+  NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+  switch (pattern) {
+    case Divide3:
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31));
+      break;
+    case Divide5:
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+      break;
+    case Divide7:
+      OpRegReg(kOpAdd, r_hi, rl_src.reg);
+      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected pattern: " << pattern;
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
+  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
+  return false;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
+  return rl_dest;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  // Put the literal in a temp.
+  RegStorage lit_temp = AllocTemp();
+  LoadConstant(lit_temp, lit);
+  // Use the generic case for div/rem with arg2 in a register.
+  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
+  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
+  FreeTemp(lit_temp);
+
+  return rl_result;
+}
+
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
+                                  bool is_div) {
+  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
+
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (is_div) {
+    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
+  } else {
+    // temp = r_src1 / r_src2
+    // dest = r_src1 - temp * r_src2
+    RegStorage temp;
+    ArmOpcode wide;
+    if (rl_result.reg.Is64Bit()) {
+      temp = AllocTempWide();
+      wide = WIDE(0);
+    } else {
+      temp = AllocTemp();
+      wide = UNWIDE(0);
+    }
+    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
+    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
+            r_src1.GetReg(), r_src2.GetReg());
+    FreeTemp(temp);
+  }
+  return rl_result;
+}
+
+bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(FATAL);
+
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  RegLocation rl_src1 = info->args[0];
+  RegLocation rl_src2 = info->args[1];
+  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+  OpIT((is_min) ? kCondGt : kCondLt, "E");
+  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
+  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
+  GenBarrier();
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (size == k64) {
+    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
+    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+    } else {
+      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+    }
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
+}
+
+bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  RegLocation rl_src_address = info->args[0];  // long address
+  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
+  RegLocation rl_src_value = info->args[2];  // [size] value
+  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
+  if (size == k64) {
+    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
+    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
+  } else {
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
+    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
+  }
+  return true;
+}
+
+void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
+  LOG(FATAL) << "Unexpected use of OpLea for Arm64";
+}
+
+void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
+  UNIMPLEMENTED(FATAL) << "Should not be used.";
+}
+
+void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
+  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64";
+}
+
+bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  // TODO(Arm64): implement this.
+  UNIMPLEMENTED(WARNING);
+
+  DCHECK_EQ(cu_->instruction_set, kThumb2);
+  // Unused - RegLocation rl_src_unsafe = info->args[0];
+  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
+  RegLocation rl_src_offset = info->args[2];  // long low
+  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
+  RegLocation rl_src_expected = info->args[4];  // int, long or Object
+  // If is_long, high half is in info->args[5]
+  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
+  // If is_long, high half is in info->args[7]
+  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
+
+  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
+  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
+  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
+  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
+  // into the same temps, reducing the number of required temps down to 5. We shall work
+  // around the potentially locked temp by using LR for r_ptr, unconditionally.
+  // TODO: Pass information about the need for more temps to the stack frame generation
+  // code so that we can rely on being able to allocate enough temps.
+  DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
+  MarkTemp(rs_rA64_LR);
+  FreeTemp(rs_rA64_LR);
+  LockTemp(rs_rA64_LR);
+  bool load_early = true;
+  if (is_long) {
+    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
+        rl_src_expected.reg;
+    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
+        rl_src_new_value.reg;
+    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
+    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
+    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
+    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
+
+    if (!expected_is_good_reg && !new_value_is_good_reg) {
+      // None of expected/new_value is non-temp reg, need to load both late
+      load_early = false;
+      // Make sure they are not in the temp regs and the load will not be skipped.
+      if (expected_is_core_reg) {
+        FlushRegWide(rl_src_expected.reg);
+        ClobberSReg(rl_src_expected.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
+        rl_src_expected.location = kLocDalvikFrame;
+      }
+      if (new_value_is_core_reg) {
+        FlushRegWide(rl_src_new_value.reg);
+        ClobberSReg(rl_src_new_value.s_reg_low);
+        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
+        rl_src_new_value.location = kLocDalvikFrame;
+      }
+    }
+  }
+
+  // Release store semantics, get the barrier out of the way.  TODO: revisit
+  GenMemBarrier(kStoreLoad);
+
+  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_new_value;
+  if (!is_long) {
+    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+  } else if (load_early) {
+    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+  }
+
+  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
+    // Mark card for object assuming new value is stored.
+    MarkGCCard(rl_new_value.reg, rl_object.reg);
+  }
+
+  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+
+  RegStorage r_ptr = rs_rA64_LR;
+  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
+
+  // Free now unneeded rl_object and rl_offset to give more temps.
+  ClobberSReg(rl_object.s_reg_low);
+  FreeTemp(rl_object.reg);
+  ClobberSReg(rl_offset.s_reg_low);
+  FreeTemp(rl_offset.reg);
+
+  RegLocation rl_expected;
+  if (!is_long) {
+    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+  } else if (load_early) {
+    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
+  } else {
+    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
+    int low_reg = AllocTemp().GetReg();
+    int high_reg = AllocTemp().GetReg();
+    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
+    rl_expected = rl_new_value;
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  RegStorage r_tmp = AllocTemp();
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+
+  if (is_long) {
+    RegStorage r_tmp_high = AllocTemp();
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
+    }
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
+    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
+    if (!load_early) {
+      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
+    }
+
+    LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
+    LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
+    NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
+            rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
+    LIR* target2 = NewLIR0(kPseudoTargetLabel);
+    branch1->target = target2;
+    branch2->target = target2;
+    FreeTemp(r_tmp_high);  // Now unneeded
+
+  } else {
+    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
+    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
+    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+    OpIT(kCondEq, "T");
+    NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+  }
+
+  // Still one conditional left from OpIT(kCondEq, "T") from either branch
+  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
+  OpCondBranch(kCondEq, target);
+
+  if (!load_early) {
+    FreeTemp(rl_expected.reg);  // Now unneeded.
+  }
+
+  // result := (tmp1 != 0) ? 0 : 1;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  OpIT(kCondUlt, "");
+  LoadConstant(rl_result.reg, 0); /* cc */
+  FreeTemp(r_tmp);  // Now unneeded.
+
+  StoreValue(rl_dest, rl_result);
+
+  // Now, restore lr to its non-temp status.
+  Clobber(rs_rA64_LR);
+  UnmarkTemp(rs_rA64_LR);
+  return true;
+}
+
+LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
+}
+
+LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
+  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
+  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
+  return NULL;
+}
+
+void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
+                                               RegLocation rl_result, int lit,
+                                               int first_bit, int second_bit) {
+  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
+  if (first_bit != 0) {
+    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
+  }
+}
+
+void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
+  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
+}
+
+// Test suspend flag, return target of taken suspend branch
+LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
+  // TODO(Arm64): re-enable suspend checks, once art_quick_test_suspend is implemented and
+  //   the suspend register is properly handled in the trampolines.
+#if 0
+  NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
+  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+  // TODO(Arm64): Fake suspend check. Will always fail to branch. Remove this.
+  LIR* branch = NewLIR2((target == NULL) ? kA64Cbnz2rt : kA64Cbz2rt, rwzr, 0);
+  branch->target = target;
+  return branch;
+#endif
+}
+
+// Decrement register and branch on condition
+LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
+  // Combine sub & test using sub setflags encoding here
+  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
+  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
+  return OpCondBranch(c_code, target);
+}
+
+bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+#if ANDROID_SMP != 0
+  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
+  LIR* barrier = last_lir_insn_;
+
+  int dmb_flavor;
+  // TODO: revisit Arm barrier kinds
+  switch (barrier_kind) {
+    case kLoadStore: dmb_flavor = kISH; break;
+    case kLoadLoad: dmb_flavor = kISH; break;
+    case kStoreStore: dmb_flavor = kISHST; break;
+    case kStoreLoad: dmb_flavor = kISH; break;
+    default:
+      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
+      dmb_flavor = kSY;  // quiet gcc.
+      break;
+  }
+
+  bool ret = false;
+
+  // If the same barrier already exists, don't generate another.
+  if (barrier == nullptr
+      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
+    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
+    ret = true;
+  }
+
+  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
+  DCHECK(!barrier->flags.use_def_invalid);
+  barrier->u.m.def_mask = ENCODE_ALL;
+  return ret;
+#else
+  return false;
+#endif
+}
+
+void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValue(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
+                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  GenDivZeroCheck(rl_src2.reg);
+  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
+                             RegLocation rl_src2) {
+  RegLocation rl_result;
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                              RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2) {
+  GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2) {
+  GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
+}
+
+void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) {
+  GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
+}
+
+/*
+ * Generate array load
+ */
+void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  int data_offset;
+  RegLocation rl_result;
+  bool constant_index = rl_index.is_const;
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  if (rl_dest.wide) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  if (rl_dest.wide || rl_dest.fp || constant_index) {
+    RegStorage reg_ptr;
+    if (constant_index) {
+      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
+    } else {
+      // No special indexed operation, lea + load w/ displacement
+      reg_ptr = AllocTemp();
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+      FreeTemp(rl_index.reg);
+    }
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+    if (!constant_index) {
+      FreeTemp(reg_ptr);
+    }
+    if (rl_dest.wide) {
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      StoreValue(rl_dest, rl_result);
+    }
+  } else {
+    // Offset base, then use indexed load
+    RegStorage reg_ptr = AllocTemp();
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    FreeTemp(rl_array.reg);
+    rl_result = EvalLoc(rl_dest, reg_class, true);
+
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+    FreeTemp(reg_ptr);
+    StoreValue(rl_dest, rl_result);
+  }
+}
+
+/*
+ * Generate array store
+ *
+ */
+void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+  // TODO(Arm64): check this.
+  UNIMPLEMENTED(WARNING);
+
+  RegisterClass reg_class = RegClassBySize(size);
+  int len_offset = mirror::Array::LengthOffset().Int32Value();
+  bool constant_index = rl_index.is_const;
+
+  int data_offset;
+  if (size == k64 || size == kDouble) {
+    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
+  } else {
+    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
+  }
+
+  // If index is constant, just fold it into the data offset.
+  if (constant_index) {
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
+  }
+
+  rl_array = LoadValue(rl_array, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(rl_index, kCoreReg);
+  }
+
+  RegStorage reg_ptr;
+  bool allocated_reg_ptr_temp = false;
+  if (constant_index) {
+    reg_ptr = rl_array.reg;
+  } else if (IsTemp(rl_array.reg) && !card_mark) {
+    Clobber(rl_array.reg);
+    reg_ptr = rl_array.reg;
+  } else {
+    allocated_reg_ptr_temp = true;
+    reg_ptr = AllocTemp();
+  }
+
+  /* null object? */
+  GenNullCheck(rl_array.reg, opt_flags);
+
+  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
+  RegStorage reg_len;
+  if (needs_range_check) {
+    reg_len = AllocTemp();
+    // NOTE: max live temps(4) here.
+    /* Get len */
+    Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags);
+  }
+  /* at this point, reg_ptr points to array, 2 live temps */
+  if (rl_src.wide || rl_src.fp || constant_index) {
+    if (rl_src.wide) {
+      rl_src = LoadValueWide(rl_src, reg_class);
+    } else {
+      rl_src = LoadValue(rl_src, reg_class);
+    }
+    if (!constant_index) {
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+    }
+    if (needs_range_check) {
+      if (constant_index) {
+        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
+      } else {
+        GenArrayBoundsCheck(rl_index.reg, reg_len);
+      }
+      FreeTemp(reg_len);
+    }
+
+    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    /* reg_ptr -> array data */
+    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
+    rl_src = LoadValue(rl_src, reg_class);
+    if (needs_range_check) {
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
+      FreeTemp(reg_len);
+    }
+    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
+    MarkPossibleNullPointerException(opt_flags);
+  }
+  if (allocated_reg_ptr_temp) {
+    FreeTemp(reg_ptr);
+  }
+  if (card_mark) {
+    MarkGCCard(rl_src.reg, rl_array.reg);
+  }
+}
+
+void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+  OpKind op = kOpBkpt;
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  if (shift_amount == 0) {
+    StoreValueWide(rl_dest, rl_src);
+    return;
+  }
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  switch (opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+  }
+  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                     RegLocation rl_src1, RegLocation rl_src2) {
+  if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) {
+    if (!rl_src2.is_const) {
+      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+    }
+  } else {
+    // Associativity.
+    if (!rl_src2.is_const) {
+      DCHECK(rl_src1.is_const);
+      std::swap(rl_src1, rl_src2);
+    }
+  }
+  DCHECK(rl_src2.is_const);
+
+  OpKind op = kOpBkpt;
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      op = kOpAdd;
+      break;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      op = kOpSub;
+      break;
+    case Instruction::AND_LONG:
+    case Instruction::AND_LONG_2ADDR:
+      op = kOpAnd;
+      break;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      op = kOpOr;
+      break;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      op = kOpXor;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode";
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegImm(op, rl_result.reg, rl_src1.reg, val);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+/**
+ * @brief Split a register list in pairs or registers.
+ *
+ * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
+ * @code
+ *   int reg1 = -1, reg2 = -1;
+ *   while (reg_mask) {
+ *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ *     if (UNLIKELY(reg2 < 0)) {
+ *       // Single register in reg1.
+ *     } else {
+ *       // Pair in reg1, reg2.
+ *     }
+ *   }
+ * @endcode
+ */
+uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
+  // Find first register.
+  int first_bit_set = __builtin_ctz(reg_mask) + 1;
+  int reg = *reg1 + first_bit_set;
+  reg_mask >>= first_bit_set;
+
+  if (LIKELY(reg_mask)) {
+    // Save the first register, find the second and use the pair opcode.
+    int second_bit_set = __builtin_ctz(reg_mask) + 1;
+    *reg2 = reg;
+    reg_mask >>= second_bit_set;
+    *reg1 = reg + second_bit_set;
+    return reg_mask;
+  }
+
+  // Use the single opcode, as we just have one register.
+  *reg1 = reg;
+  *reg2 = -1;
+  return reg_mask;
+}
+
+void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+void Arm64Mir2Lir::UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+// TODO(Arm64): consider using ld1 and st1?
+void Arm64Mir2Lir::SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+  int reg1 = -1, reg2 = -1;
+  const int reg_log2_size = 3;
+
+  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+    if (UNLIKELY(reg2 < 0)) {
+      NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    } else {
+      NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
new file mode 100644
index 0000000..808060d
--- /dev/null
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -0,0 +1,935 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "codegen_arm64.h"
+
+#include <inttypes.h>
+
+#include <string>
+
+#include "dex/compiler_internals.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+// TODO: rework this when c++11 support allows.
+static const RegStorage core_regs_arr[] =
+    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
+     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15,
+     rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23,
+     rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31,
+     rs_wzr};
+static const RegStorage core64_regs_arr[] =
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
+     rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
+     rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
+     rs_xzr};
+static const RegStorage sp_regs_arr[] =
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
+     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
+static const RegStorage dp_regs_arr[] =
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
+     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
+static const RegStorage reserved_regs_arr[] =
+    {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr};
+static const RegStorage reserved64_regs_arr[] =
+    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
+// TUNING: Are there too many temp registers and too less promote target?
+// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
+// Note: we are not able to call to C function directly if it un-match C ABI.
+// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
+static const RegStorage core_temps_arr[] =
+    {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
+     rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
+     rs_w17};
+static const RegStorage core64_temps_arr[] =
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
+     rs_x17};
+static const RegStorage sp_temps_arr[] =
+    {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
+static const RegStorage dp_temps_arr[] =
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
+
+static const std::vector<RegStorage> core_regs(core_regs_arr,
+    core_regs_arr + arraysize(core_regs_arr));
+static const std::vector<RegStorage> core64_regs(core64_regs_arr,
+    core64_regs_arr + arraysize(core64_regs_arr));
+static const std::vector<RegStorage> sp_regs(sp_regs_arr,
+    sp_regs_arr + arraysize(sp_regs_arr));
+static const std::vector<RegStorage> dp_regs(dp_regs_arr,
+    dp_regs_arr + arraysize(dp_regs_arr));
+static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
+    reserved_regs_arr + arraysize(reserved_regs_arr));
+static const std::vector<RegStorage> reserved64_regs(reserved64_regs_arr,
+    reserved64_regs_arr + arraysize(reserved64_regs_arr));
+static const std::vector<RegStorage> core_temps(core_temps_arr,
+    core_temps_arr + arraysize(core_temps_arr));
+static const std::vector<RegStorage> core64_temps(core64_temps_arr,
+    core64_temps_arr + arraysize(core64_temps_arr));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
+
+RegLocation Arm64Mir2Lir::LocCReturn() {
+  return arm_loc_c_return;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnWide() {
+  return arm_loc_c_return_wide;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnFloat() {
+  return arm_loc_c_return_float;
+}
+
+RegLocation Arm64Mir2Lir::LocCReturnDouble() {
+  return arm_loc_c_return_double;
+}
+
+// Return a target-dependent special register.
+RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+  // TODO(Arm64): this function doesn't work for hard-float ABI.
+  RegStorage res_reg = RegStorage::InvalidReg();
+  switch (reg) {
+    case kSelf: res_reg = rs_rA64_SELF; break;
+    case kSuspend: res_reg = rs_rA64_SUSPEND; break;
+    case kLr: res_reg =  rs_rA64_LR; break;
+    case kPc: res_reg = RegStorage::InvalidReg(); break;
+    case kSp: res_reg =  rs_rA64_SP; break;
+    case kArg0: res_reg = rs_x0; break;
+    case kArg1: res_reg = rs_x1; break;
+    case kArg2: res_reg = rs_x2; break;
+    case kArg3: res_reg = rs_x3; break;
+    case kFArg0: res_reg = rs_f0; break;
+    case kFArg1: res_reg = rs_f1; break;
+    case kFArg2: res_reg = rs_f2; break;
+    case kFArg3: res_reg = rs_f3; break;
+    case kRet0: res_reg = rs_x0; break;
+    case kRet1: res_reg = rs_x0; break;
+    case kInvokeTgt: res_reg = rs_rA64_LR; break;
+    case kHiddenArg: res_reg = rs_x12; break;
+    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
+    case kCount: res_reg = RegStorage::InvalidReg(); break;
+  }
+  return res_reg;
+}
+
+RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  return RegStorage::InvalidReg();
+}
+
+/*
+ * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
+ */
+uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
+  // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
+
+  int reg_raw = reg.GetRawBits();
+  // Check if the shape mask is zero (i.e. invalid).
+  if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
+    // The zero register is not a true register. It is just an immediate zero.
+    return 0;
+  }
+
+  return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask);
+}
+
+uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
+  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
+  return 0ULL;
+}
+
+// Arm64 specific setup.  TODO: inline?:
+void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  DCHECK(!lir->flags.use_def_invalid);
+
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
+    }
+
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
+  }
+}
+
+ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) {
+  ArmConditionCode res;
+  switch (ccode) {
+    case kCondEq: res = kArmCondEq; break;
+    case kCondNe: res = kArmCondNe; break;
+    case kCondCs: res = kArmCondCs; break;
+    case kCondCc: res = kArmCondCc; break;
+    case kCondUlt: res = kArmCondCc; break;
+    case kCondUge: res = kArmCondCs; break;
+    case kCondMi: res = kArmCondMi; break;
+    case kCondPl: res = kArmCondPl; break;
+    case kCondVs: res = kArmCondVs; break;
+    case kCondVc: res = kArmCondVc; break;
+    case kCondHi: res = kArmCondHi; break;
+    case kCondLs: res = kArmCondLs; break;
+    case kCondGe: res = kArmCondGe; break;
+    case kCondLt: res = kArmCondLt; break;
+    case kCondGt: res = kArmCondGt; break;
+    case kCondLe: res = kArmCondLe; break;
+    case kCondAl: res = kArmCondAl; break;
+    case kCondNv: res = kArmCondNv; break;
+    default:
+      LOG(FATAL) << "Bad condition code " << ccode;
+      res = static_cast<ArmConditionCode>(0);  // Quiet gcc
+  }
+  return res;
+}
+
+static const char *shift_names[4] = {
+  "lsl",
+  "lsr",
+  "asr",
+  "ror"
+};
+
+static const char* extend_names[8] = {
+  "uxtb",
+  "uxth",
+  "uxtw",
+  "uxtx",
+  "sxtb",
+  "sxth",
+  "sxtw",
+  "sxtx",
+};
+
+/* Decode and print a register extension (e.g. ", uxtb #1") */
+static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) {
+  if ((operand & (1 << 6)) == 0) {
+    const char *shift_name = shift_names[(operand >> 7) & 0x3];
+    int amount = operand & 0x3f;
+    snprintf(buf, buf_size, ", %s #%d", shift_name, amount);
+  } else {
+    const char *extend_name = extend_names[(operand >> 3) & 0x7];
+    int amount = operand & 0x7;
+    if (amount == 0) {
+      snprintf(buf, buf_size, ", %s", extend_name);
+    } else {
+      snprintf(buf, buf_size, ", %s #%d", extend_name, amount);
+    }
+  }
+}
+
+#define BIT_MASK(w) ((UINT64_C(1) << (w)) - UINT64_C(1))
+
+static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) {
+  DCHECK_LE(width, 64U);
+  rotate &= 63;
+  value = value & BIT_MASK(width);
+  return ((value & BIT_MASK(rotate)) << (width - rotate)) | (value >> rotate);
+}
+
+static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) {
+  unsigned i;
+  unsigned reg_size = (is_wide) ? 64 : 32;
+  uint64_t result = value & BIT_MASK(width);
+  DCHECK_NE(width, reg_size);
+  for (i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  DCHECK_EQ(i, reg_size);
+  return result;
+}
+
+/**
+ * @brief Decode an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate.
+ * @param value The encoded logical immediates that is to be decoded.
+ * @return The decoded logical immediate.
+ * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate().
+ */
+uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) {
+  unsigned n     = (value >> 12) & 0x01;
+  unsigned imm_r = (value >>  6) & 0x3f;
+  unsigned imm_s = (value >>  0) & 0x3f;
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  // N   imms immr  size S             R
+  // 1 ssssss rrrrrr 64  UInt(ssssss) UInt(rrrrrr)
+  // 0 0sssss xrrrrr 32  UInt(sssss)  UInt(rrrrr)
+  // 0 10ssss xxrrrr 16  UInt(ssss)   UInt(rrrr)
+  // 0 110sss xxxrrr 8   UInt(sss)    UInt(rrr)
+  // 0 1110ss xxxxrr 4   UInt(ss)     UInt(rr)
+  // 0 11110s xxxxxr 2   UInt(s)      UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+
+  if (n == 1) {
+    DCHECK_NE(imm_s, 0x3fU);
+    uint64_t bits = BIT_MASK(imm_s + 1);
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    DCHECK_NE((imm_s >> 1), 0x1fU);
+    for (unsigned width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        unsigned mask = (unsigned)(width - 1);
+        DCHECK_NE((imm_s & mask), mask);
+        uint64_t bits = BIT_MASK((imm_s & mask) + 1);
+        return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width);
+      }
+    }
+  }
+  return 0;
+}
+
+/**
+ * @brief Decode an 8-bit single point number encoded with EncodeImmSingle().
+ */
+static float DecodeImmSingle(uint8_t small_float) {
+  int mantissa = (small_float & 0x0f) + 0x10;
+  int sign = ((small_float & 0x80) == 0) ? 1 : -1;
+  float signed_mantissa = static_cast<float>(sign*mantissa);
+  int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7;
+  return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f;
+}
+
+static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+                                 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in assemble_arm64.cc.
+ */
+std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
+  std::string buf;
+  const char* fmt_end = &fmt[strlen(fmt)];
+  char tbuf[256];
+  const char* name;
+  char nc;
+  while (fmt < fmt_end) {
+    int operand;
+    if (*fmt == '!') {
+      fmt++;
+      DCHECK_LT(fmt, fmt_end);
+      nc = *fmt++;
+      if (nc == '!') {
+        strcpy(tbuf, "!");
+      } else {
+         DCHECK_LT(fmt, fmt_end);
+         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
+         operand = lir->operands[nc-'0'];
+         switch (*fmt++) {
+           case 'e':  {
+               // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and
+               // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0"
+               int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) :
+                                EncodeExtend(kA64Uxtw, 0));
+               if (LIKELY(operand == omittable)) {
+                 strcpy(tbuf, "");
+               } else {
+                 DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+               }
+             }
+             break;
+           case 'o':
+             // Omit ", lsl #0"
+             if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) {
+               strcpy(tbuf, "");
+             } else {
+               DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+             }
+             break;
+           case 'B':
+             switch (operand) {
+               case kSY:
+                 name = "sy";
+                 break;
+               case kST:
+                 name = "st";
+                 break;
+               case kISH:
+                 name = "ish";
+                 break;
+               case kISHST:
+                 name = "ishst";
+                 break;
+               case kNSH:
+                 name = "nsh";
+                 break;
+               case kNSHST:
+                 name = "shst";
+                 break;
+               default:
+                 name = "DecodeError2";
+                 break;
+             }
+             strcpy(tbuf, name);
+             break;
+           case 's':
+             snprintf(tbuf, arraysize(tbuf), "s%d", operand & RegStorage::kRegNumMask);
+             break;
+           case 'S':
+             snprintf(tbuf, arraysize(tbuf), "d%d", operand & RegStorage::kRegNumMask);
+             break;
+           case 'f':
+             snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
+                      operand & RegStorage::kRegNumMask);
+             break;
+           case 'l': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               uint64_t imm = DecodeLogicalImmediate(is_wide, operand);
+               snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm);
+             }
+             break;
+           case 'I':
+             snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand));
+             break;
+           case 'M':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand);
+             break;
+           case 'd':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand);
+             break;
+           case 'w':
+             if (LIKELY(operand != rwzr))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wzr");
+             break;
+           case 'W':
+             if (LIKELY(operand != rwsp))
+               snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "wsp");
+             break;
+           case 'x':
+             if (LIKELY(operand != rxzr))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "xzr");
+             break;
+           case 'X':
+             if (LIKELY(operand != rsp))
+               snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+             else
+               strcpy(tbuf, "sp");
+             break;
+           case 'D':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4));
+             break;
+           case 'E':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
+             break;
+           case 'F':
+             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
+             break;
+           case 'G':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else
+               strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2");
+             break;
+           case 'c':
+             strcpy(tbuf, cc_names[operand]);
+             break;
+           case 't':
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2),
+                 lir->target);
+             break;
+           case 'r': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwzr && operand != rxzr)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "xzr" : "wzr");
+               }
+             }
+             break;
+           case 'R': {
+               bool is_wide = IS_WIDE(lir->opcode);
+               if (LIKELY(operand != rwsp && operand != rsp)) {
+                 snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+                          operand & RegStorage::kRegNumMask);
+               } else {
+                 strcpy(tbuf, (is_wide) ? "sp" : "wsp");
+               }
+             }
+             break;
+           case 'p':
+             snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand,
+                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand);
+             break;
+           case 'T':
+             if (LIKELY(operand == 0))
+               strcpy(tbuf, "");
+             else if (operand == 1)
+               strcpy(tbuf, ", lsl #12");
+             else
+               strcpy(tbuf, ", DecodeError3");
+             break;
+           default:
+             strcpy(tbuf, "DecodeError1");
+             break;
+        }
+        buf += tbuf;
+      }
+    } else {
+       buf += *fmt++;
+    }
+  }
+  return buf;
+}
+
+void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) {
+  char buf[256];
+  buf[0] = 0;
+
+  if (mask == ENCODE_ALL) {
+    strcpy(buf, "all");
+  } else {
+    char num[8];
+    int i;
+
+    for (i = 0; i < kArmRegEnd; i++) {
+      if (mask & (1ULL << i)) {
+        snprintf(num, arraysize(num), "%d ", i);
+        strcat(buf, num);
+      }
+    }
+
+    if (mask & ENCODE_CCODE) {
+      strcat(buf, "cc ");
+    }
+    if (mask & ENCODE_FP_STATUS) {
+      strcat(buf, "fpcc ");
+    }
+
+    /* Memory bits */
+    if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
+      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
+               DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+               DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
+    }
+    if (mask & ENCODE_LITERAL) {
+      strcat(buf, "lit ");
+    }
+
+    if (mask & ENCODE_HEAP_REF) {
+      strcat(buf, "heap ");
+    }
+    if (mask & ENCODE_MUST_NOT_ALIAS) {
+      strcat(buf, "noalias ");
+    }
+  }
+  if (buf[0]) {
+    LOG(INFO) << prefix << ": " << buf;
+  }
+}
+
+bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
+  return (lir->opcode == kA64B1t);
+}
+
+bool Arm64Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
+  return true;
+}
+
+RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  if (UNLIKELY(is_volatile)) {
+    // On arm64, fp register load/store is atomic only for single bytes.
+    if (size != kSignedByte && size != kUnsignedByte) {
+      return kCoreReg;
+    }
+  }
+  return RegClassBySize(size);
+}
+
+Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+    : Mir2Lir(cu, mir_graph, arena) {
+  // Sanity check - make sure encoding map lines up.
+  for (int i = 0; i < kA64Last; i++) {
+    if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
+      LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
+                 << " is wrong: expecting " << i << ", seeing "
+                 << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
+    }
+  }
+}
+
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena) {
+  return new Arm64Mir2Lir(cu, mir_graph, arena);
+}
+
+void Arm64Mir2Lir::CompilerInitializeRegAlloc() {
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, core64_regs, sp_regs, dp_regs,
+                                        reserved_regs, reserved64_regs, core_temps, core64_temps,
+                                        sp_temps, dp_temps);
+
+  // Target-specific adjustments.
+  // Alias single precision float registers to corresponding double registers.
+  GrowableArray<RegisterInfo*>::Iterator fp_it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) {
+    int fp_reg_num = info->GetReg().GetRegNum();
+    RegStorage dp_reg = RegStorage::FloatSolo64(fp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // Double precision register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect single precision's master storage to master.
+    info->SetMaster(dp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+  }
+
+  // Alias 32bit W registers to corresponding 64bit X registers.
+  GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+    int x_reg_num = info->GetReg().GetRegNum();
+    RegStorage x_reg = RegStorage::Solo64(x_reg_num);
+    RegisterInfo* x_reg_info = GetRegInfo(x_reg);
+    // 64bit X register's master storage should refer to itself.
+    DCHECK_EQ(x_reg_info, x_reg_info->Master());
+    // Redirect 32bit W master storage to 64bit X.
+    info->SetMaster(x_reg_info);
+    // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+  }
+
+  // TODO: re-enable this when we can safely save r4 over the suspension code path.
+  bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
+  if (no_suspend) {
+    GetRegInfo(rs_rA64_SUSPEND)->MarkFree();
+  }
+
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust when we roll to hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 0;
+  reg_pool_->next_dp_reg_ = 0;
+}
+
+/*
+ * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
+ * instructions might call out to C/assembly helper functions.  Until
+ * machinery is in place, always spill lr.
+ */
+
+void Arm64Mir2Lir::AdjustSpillMask() {
+  core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum());
+  num_core_spills_++;
+}
+
+/*
+ * Mark a callee-save fp register as promoted.
+ */
+void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
+  DCHECK(reg.IsFloat());
+  int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
+  // Ensure fp_vmap_table is large enough
+  int table_size = fp_vmap_table_.size();
+  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
+    fp_vmap_table_.push_back(INVALID_VREG);
+  }
+  // Add the current mapping
+  fp_vmap_table_[adjusted_reg_num] = v_reg;
+  // Size of fp_vmap_table is high-water mark, use to set mask
+  num_fp_spills_ = fp_vmap_table_.size();
+  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
+}
+
+void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
+  DCHECK(reg.IsDouble());
+  MarkPreservedSingle(v_reg, reg);
+}
+
+/* Clobber all regs that might be used by an external C call */
+void Arm64Mir2Lir::ClobberCallerSave() {
+  Clobber(rs_x0);
+  Clobber(rs_x1);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  Clobber(rs_x4);
+  Clobber(rs_x5);
+  Clobber(rs_x6);
+  Clobber(rs_x7);
+  Clobber(rs_x8);
+  Clobber(rs_x9);
+  Clobber(rs_x10);
+  Clobber(rs_x11);
+  Clobber(rs_x12);
+  Clobber(rs_x13);
+  Clobber(rs_x14);
+  Clobber(rs_x15);
+  Clobber(rs_x16);
+  Clobber(rs_x17);
+  Clobber(rs_x30);
+
+  Clobber(rs_f0);
+  Clobber(rs_f1);
+  Clobber(rs_f2);
+  Clobber(rs_f3);
+  Clobber(rs_f4);
+  Clobber(rs_f5);
+  Clobber(rs_f6);
+  Clobber(rs_f7);
+  Clobber(rs_f16);
+  Clobber(rs_f17);
+  Clobber(rs_f18);
+  Clobber(rs_f19);
+  Clobber(rs_f20);
+  Clobber(rs_f21);
+  Clobber(rs_f22);
+  Clobber(rs_f23);
+  Clobber(rs_f24);
+  Clobber(rs_f25);
+  Clobber(rs_f26);
+  Clobber(rs_f27);
+  Clobber(rs_f28);
+  Clobber(rs_f29);
+  Clobber(rs_f30);
+  Clobber(rs_f31);
+}
+
+RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
+  RegLocation res = LocCReturnWide();
+  res.reg.SetReg(rx2);
+  res.reg.SetHighReg(rx3);
+  Clobber(rs_x2);
+  Clobber(rs_x3);
+  MarkInUse(rs_x2);
+  MarkInUse(rs_x3);
+  MarkWide(res.reg);
+  return res;
+}
+
+RegLocation Arm64Mir2Lir::GetReturnAlt() {
+  RegLocation res = LocCReturn();
+  res.reg.SetReg(rx1);
+  Clobber(rs_x1);
+  MarkInUse(rs_x1);
+  return res;
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::LockCallTemps() {
+  LockTemp(rs_x0);
+  LockTemp(rs_x1);
+  LockTemp(rs_x2);
+  LockTemp(rs_x3);
+}
+
+/* To be used when explicitly managing register use */
+void Arm64Mir2Lir::FreeCallTemps() {
+  FreeTemp(rs_x0);
+  FreeTemp(rs_x1);
+  FreeTemp(rs_x2);
+  FreeTemp(rs_x3);
+}
+
+RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+  return RegStorage::InvalidReg();
+}
+
+RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
+  // TODO(Arm64): use LoadWordDisp instead.
+  //   e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
+  LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64);
+  return rs_rA64_LR;
+}
+
+LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
+  RegStorage tmp = rs_x0;
+  LoadWordDisp(rs_rA64_SELF, Thread::ThreadSuspendTriggerOffset<8>().Int32Value(), tmp);
+  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
+  return load2;
+}
+
+uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name;
+}
+
+const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
+  DCHECK(!IsPseudoLirOp(opcode));
+  return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
+}
+
+// TODO(Arm64): reuse info in QuickArgumentVisitor?
+static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
+                                    OpSize* op_size) {
+  if (loc->fp) {
+    int n = *num_fpr_used;
+    if (n < 8) {
+      *num_fpr_used = n + 1;
+      RegStorage::RegStorageKind reg_kind;
+      if (loc->wide) {
+        *op_size = kDouble;
+        reg_kind = RegStorage::k64BitSolo;
+      } else {
+        *op_size = kSingle;
+        reg_kind = RegStorage::k32BitSolo;
+      }
+      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
+    }
+  } else {
+    int n = *num_gpr_used;
+    if (n < 7) {
+      *num_gpr_used = n + 1;
+      if (loc->wide) {
+        *op_size = k64;
+        return RegStorage::Solo64(n);
+      } else {
+        *op_size = k32;
+        return RegStorage::Solo32(n);
+      }
+    }
+  }
+
+  return RegStorage::InvalidReg();
+}
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  int num_gpr_used = 1;
+  int num_fpr_used = 0;
+
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+
+  // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it.
+  StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+
+  // If Method* has been promoted, load it,
+  // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded.
+  if (rl_method.location == kLocPhysReg) {
+    StoreValue(rl_method, rl_src);
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegLocation* t_loc = &ArgLocs[i];
+    OpSize op_size;
+    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
+
+    if (reg.Valid()) {
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+      } else {
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size);
+        if (reg.Is64Bit()) {
+          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+          }
+          i += 1;
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+                     RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
+                              NextCallInsn next_call_insn,
+                              const MethodReference& target_method,
+                              uint32_t vtable_idx, uintptr_t direct_code,
+                              uintptr_t direct_method, InvokeType type, bool skip_this) {
+  int last_arg_reg = TargetReg(kArg3).GetReg();
+  int next_reg = TargetReg(kArg1).GetReg();
+  int next_arg = 0;
+  if (skip_this) {
+    next_reg++;
+    next_arg++;
+  }
+  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
+    RegLocation rl_arg = info->args[next_arg++];
+    rl_arg = UpdateRawLoc(rl_arg);
+    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
+      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
+      LoadValueDirectWideFixed(rl_arg, r_tmp);
+      next_reg++;
+      next_arg++;
+    } else {
+      if (rl_arg.wide) {
+        rl_arg = NarrowRegLoc(rl_arg);
+        rl_arg.is_const = false;
+      }
+      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+    }
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                                direct_code, direct_method, type);
+  }
+  return call_state;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
new file mode 100644
index 0000000..eca0d2f
--- /dev/null
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -0,0 +1,1031 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm64_lir.h"
+#include "codegen_arm64.h"
+#include "dex/quick/mir_to_lir-inl.h"
+
+namespace art {
+
+/* This file contains codegen for the A64 ISA. */
+
+static int32_t EncodeImmSingle(uint32_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
+   *
+   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
+   */
+
+  // bits[19..0] are cleared.
+  if ((bits & 0x0007ffff) != 0)
+    return -1;
+
+  // bits[29..25] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 16) & 0x3e00;
+  if (b_pattern != 0 && b_pattern != 0x3e00)
+    return -1;
+
+  // bit[30] and bit[29] are opposite.
+  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
+    return -1;
+
+  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
+}
+
+static int32_t EncodeImmDouble(uint64_t bits) {
+  /*
+   * Valid values will have the form:
+   *
+   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+   *   0000.0000.0000.0000.0000.0000.0000.0000
+   *
+   * where B = not(b).
+   */
+
+  // bits[47..0] are cleared.
+  if ((bits & UINT64_C(0xffffffffffff)) != 0)
+    return -1;
+
+  // bits[61..54] are all set or all cleared.
+  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+  if (b_pattern != 0 && b_pattern != 0x3fc0)
+    return -1;
+
+  // bit[62] and bit[61] are opposite.
+  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
+    return -1;
+
+  // bit7: a000.0000
+  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
+  // bit6: 0b00.0000
+  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
+  // bit5_to_0: 00cd.efgh
+  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
+  return (bit7 | bit6 | bit5_to_0);
+}
+
+LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
+  DCHECK(RegStorage::IsSingle(r_dest));
+  if (value == 0) {
+    return NewLIR2(kA64Fmov2sw, r_dest, rwzr);
+  } else {
+    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm);
+    }
+  }
+
+  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&literal_list_, value);
+  }
+
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
+                            r_dest, 0, 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
+}
+
+LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
+  DCHECK(RegStorage::IsDouble(r_dest));
+  if (value == 0) {
+    return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+  } else {
+    int32_t encoded_imm = EncodeImmDouble(value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm);
+    }
+  }
+
+  // No short form - load from the literal pool.
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
+  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+  if (data_target == NULL) {
+    data_target = AddWideData(&literal_list_, val_lo, val_hi);
+  }
+
+  DCHECK(RegStorage::IsFloat(r_dest));
+  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
+                            r_dest, 0, 0, 0, 0, data_target);
+  SetMemRefType(load_pc_rel, true, kLiteral);
+  AppendLIR(load_pc_rel);
+  return load_pc_rel;
+}
+
+static int CountLeadingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value);
+}
+
+static int CountTrailingZeros(bool is_wide, uint64_t value) {
+  return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value);
+}
+
+static int CountSetBits(bool is_wide, uint64_t value) {
+  return ((is_wide) ?
+          __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+}
+
+/**
+ * @brief Try encoding an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
+ * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
+ *   32-bit if @p is_wide is false.
+ * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
+ * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
+ */
+int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
+  unsigned n, imm_s, imm_r;
+
+  // Logical immediates are encoded using parameters n, imm_s and imm_r using
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+  // To test if an arbitary immediate can be encoded using this scheme, an
+  // iterative algorithm is used.
+  //
+
+  // 1. If the value has all set or all clear bits, it can't be encoded.
+  if (value == 0 || value == ~UINT64_C(0) ||
+      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
+    return -1;
+  }
+
+  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
+  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
+  unsigned trail_zero = CountTrailingZeros(is_wide, value);
+  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
+  unsigned set_bits   = CountSetBits(is_wide, value);
+
+  // The fixed bits in the immediate s field.
+  // If width == 64 (X reg), start at 0xFFFFFF80.
+  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
+  // widths won't be executed.
+  unsigned width = (is_wide) ? 64 : 32;
+  int imm_s_fixed = (is_wide) ? -128 : -64;
+  int imm_s_mask = 0x3f;
+
+  for (;;) {
+    // 2. If the value is two bits wide, it can be encoded.
+    if (width == 2) {
+      n = 0;
+      imm_s = 0x3C;
+      imm_r = (value & 3) - 1;
+      break;
+    }
+
+    n = (width == 64) ? 1 : 0;
+    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
+    if ((lead_zero + set_bits) == width) {
+      imm_r = 0;
+    } else {
+      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
+    }
+
+    // 3. If the sum of leading zeros, trailing zeros and set bits is
+    //    equal to the bit width of the value, it can be encoded.
+    if (lead_zero + trail_zero + set_bits == width) {
+      break;
+    }
+
+    // 4. If the sum of leading ones, trailing ones and unset bits in the
+    //    value is equal to the bit width of the value, it can be encoded.
+    if (lead_one + trail_one + (width - set_bits) == width) {
+      break;
+    }
+
+    // 5. If the most-significant half of the bitwise value is equal to
+    //    the least-significant half, return to step 2 using the
+    //    least-significant half of the value.
+    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
+    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
+      width >>= 1;
+      set_bits >>= 1;
+      imm_s_fixed >>= 1;
+      continue;
+    }
+
+    // 6. Otherwise, the value can't be encoded.
+    return -1;
+  }
+
+  return (n << 12 | imm_r << 6 | imm_s);
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
+  return false;  // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
+  return EncodeImmSingle(value) >= 0;
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
+  return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
+  return EncodeImmDouble(value) >= 0;
+}
+
+/*
+ * Load a immediate using one single instruction when possible; otherwise
+ * use a pair of movz and movk instructions.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) r_dest is freshly returned from AllocTemp or
+ * 2) The codegen is under fixed register usage
+ */
+LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
+  LIR* res;
+
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValue(r_dest.GetReg(), value);
+  }
+
+  // Loading SP/ZR with an immediate is not supported.
+  DCHECK_NE(r_dest.GetReg(), rwsp);
+  DCHECK_NE(r_dest.GetReg(), rwzr);
+
+  // Compute how many movk, movz instructions are needed to load the value.
+  uint16_t high_bits = High16Bits(value);
+  uint16_t low_bits = Low16Bits(value);
+
+  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
+  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
+
+  if (LIKELY(low_fast || high_fast)) {
+    // 1 instruction is enough to load the immediate.
+    if (LIKELY(low_bits == high_bits)) {
+      // Value is either 0 or -1: we can just use wzr.
+      ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
+      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
+    } else {
+      uint16_t uniform_bits, useful_bits;
+      int shift;
+
+      if (LIKELY(high_fast)) {
+        shift = 0;
+        uniform_bits = high_bits;
+        useful_bits = low_bits;
+      } else {
+        shift = 1;
+        uniform_bits = low_bits;
+        useful_bits = high_bits;
+      }
+
+      if (UNLIKELY(uniform_bits != 0)) {
+        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
+      } else {
+        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
+      }
+    }
+  } else {
+    // movk, movz require 2 instructions. Try detecting logical immediates.
+    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
+    if (log_imm >= 0) {
+      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
+    } else {
+      // Use 2 instructions.
+      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
+      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
+    }
+  }
+
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
+  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
+  res->target = target;
+  return res;
+}
+
+LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
+  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
+                        0 /* offset to be patched */);
+  branch->target = target;
+  return branch;
+}
+
+LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
+  ArmOpcode opcode = kA64Brk1d;
+  switch (op) {
+    case kOpBlx:
+      opcode = kA64Blr1x;
+      break;
+    // TODO(Arm64): port kThumbBx.
+    // case kOpBx:
+    //   opcode = kThumbBx;
+    //   break;
+    default:
+      LOG(FATAL) << "Bad opcode " << op;
+  }
+  return NewLIR1(opcode, r_dest_src.GetReg());
+}
+
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpCmn:
+      opcode = kA64Cmn3rro;
+      break;
+    case kOpCmp:
+      opcode = kA64Cmp3rro;
+      break;
+    case kOpMov:
+      opcode = kA64Mov2rr;
+      break;
+    case kOpMvn:
+      opcode = kA64Mvn2rr;
+      break;
+    case kOpNeg:
+      opcode = kA64Neg3rro;
+      break;
+    case kOpTst:
+      opcode = kA64Tst3rro;
+      break;
+    case kOpRev:
+      DCHECK_EQ(shift, 0);
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      break;
+    case kOpRevsh:
+      // Binary, but rm is encoded twice.
+      return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
+      break;
+    case kOp2Byte:
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use sbfm directly.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
+    case kOp2Short:
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      // For now we use sbfm rather than its alias, sbfx.
+      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
+    case kOp2Char:
+      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+      // For now we use ubfm directly.
+      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
+    default:
+      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
+  }
+
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
+  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    if (kind == kFmtShift) {
+      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
+    }
+  }
+
+  LOG(FATAL) << "Unexpected encoding operand count";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+  return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
+}
+
+LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
+  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                    RegStorage r_src2, int shift) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpAdd:
+      opcode = kA64Add4rrro;
+      break;
+    case kOpSub:
+      opcode = kA64Sub4rrro;
+      break;
+    // case kOpRsub:
+    //   opcode = kA64RsubWWW;
+    //   break;
+    case kOpAdc:
+      opcode = kA64Adc3rrr;
+      break;
+    case kOpAnd:
+      opcode = kA64And4rrro;
+      break;
+    case kOpXor:
+      opcode = kA64Eor4rrro;
+      break;
+    case kOpMul:
+      opcode = kA64Mul3rrr;
+      break;
+    case kOpDiv:
+      opcode = kA64Sdiv3rrr;
+      break;
+    case kOpOr:
+      opcode = kA64Orr4rrro;
+      break;
+    case kOpSbc:
+      opcode = kA64Sbc3rrr;
+      break;
+    case kOpLsl:
+      opcode = kA64Lsl3rrr;
+      break;
+    case kOpLsr:
+      opcode = kA64Lsr3rrr;
+      break;
+    case kOpAsr:
+      opcode = kA64Asr3rrr;
+      break;
+    case kOpRor:
+      opcode = kA64Ror3rrr;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+
+  // The instructions above belong to two kinds:
+  // - 4-operands instructions, where the last operand is a shift/extend immediate,
+  // - 3-operands instructions with no shift/extend.
+  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
+  CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
+  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
+  } else {
+    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
+    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
+  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
+}
+
+// Should be taking an int64_t value ?
+LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
+  LIR* res;
+  bool neg = (value < 0);
+  int64_t abs_value = (neg) ? -value : value;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int32_t log_imm = -1;
+  bool is_wide = r_dest.Is64Bit();
+  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
+  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+
+  switch (op) {
+    case kOpLsl: {
+      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
+      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
+      // For now, we just use ubfm directly.
+      int max_value = (is_wide) ? 64 : 32;
+      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
+                     (-value) & (max_value - 1), max_value - value);
+    }
+    case kOpLsr:
+      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpAsr:
+      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
+    case kOpRor:
+      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
+      // For now, we just use extr directly.
+      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
+                     value);
+    case kOpAdd:
+      neg = !neg;
+      // Note: intentional fallthrough
+    case kOpSub:
+      // Add and sub below read/write sp rather than xzr.
+      if (abs_value < 0x1000) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
+      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
+      } else {
+        log_imm = -1;
+        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
+      }
+      break;
+    // case kOpRsub:
+    //   opcode = kThumb2RsubRRI8M;
+    //   alt_opcode = kThumb2RsubRRR;
+    //   break;
+    case kOpAdc:
+      log_imm = -1;
+      alt_opcode = kA64Adc3rrr;
+      break;
+    case kOpSbc:
+      log_imm = -1;
+      alt_opcode = kA64Sbc3rrr;
+      break;
+    case kOpOr:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Orr3Rrl;
+      alt_opcode = kA64Orr4rrro;
+      break;
+    case kOpAnd:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64And3Rrl;
+      alt_opcode = kA64And4rrro;
+      break;
+    case kOpXor:
+      log_imm = EncodeLogicalImmediate(is_wide, value);
+      opcode = kA64Eor3Rrl;
+      alt_opcode = kA64Eor4rrro;
+      break;
+    case kOpMul:
+      // TUNING: power of 2, shift & add
+      log_imm = -1;
+      alt_opcode = kA64Mul3rrr;
+      break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+  }
+
+  if (log_imm >= 0) {
+    return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
+  } else {
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, value);
+    if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
+      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+    else
+      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
+    FreeTemp(r_scratch);
+    return res;
+  }
+}
+
+LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
+  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
+}
+
+LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode neg_opcode = kA64Brk1d;
+  bool shift;
+  bool neg = (value < 0);
+  uint64_t abs_value = (neg) ? -value : value;
+
+  if (LIKELY(abs_value < 0x1000)) {
+    // abs_value is a 12-bit immediate.
+    shift = false;
+  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+    // abs_value is a shifted 12-bit immediate.
+    shift = true;
+    abs_value >>= 12;
+  } else {
+    RegStorage r_tmp = AllocTemp();
+    LIR* res = LoadConstant(r_tmp, value);
+    OpRegReg(op, r_dest_src1, r_tmp);
+    FreeTemp(r_tmp);
+    return res;
+  }
+
+  switch (op) {
+    case kOpAdd:
+      neg_opcode = kA64Sub4RRdT;
+      opcode = kA64Add4RRdT;
+      break;
+    case kOpSub:
+      neg_opcode = kA64Add4RRdT;
+      opcode = kA64Sub4RRdT;
+      break;
+    case kOpCmp:
+      neg_opcode = kA64Cmn3RdT;
+      opcode = kA64Cmp3RdT;
+      break;
+    default:
+      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
+      break;
+  }
+
+  if (UNLIKELY(neg))
+    opcode = neg_opcode;
+
+  if (EncodingMap[opcode].flags & IS_QUAD_OP)
+    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
+                   (shift) ? 1 : 0);
+  else
+    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
+}
+
+LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
+  if (r_dest.IsFloat()) {
+    return LoadFPConstantValueWide(r_dest.GetReg(), value);
+  } else {
+    // TODO(Arm64): check whether we can load the immediate with a short form.
+    //   e.g. via movz, movk or via logical immediate.
+
+    // No short form - load from the literal pool.
+    int32_t val_lo = Low32Bits(value);
+    int32_t val_hi = High32Bits(value);
+    LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+    if (data_target == NULL) {
+      data_target = AddWideData(&literal_list_, val_lo, val_hi);
+    }
+
+    LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
+                      r_dest.GetReg(), 0, 0, 0, 0, data_target);
+    SetMemRefType(res, true, kLiteral);
+    AppendLIR(res);
+    return res;
+  }
+}
+
+int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
+  return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+}
+
+int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
+  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
+}
+
+bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
+  return ((1 << 6) & encoded_value) != 0;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+                                   int scale, OpSize size) {
+  LIR* load;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
+
+  if (r_dest.IsFloat()) {
+    bool is_double = r_dest.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_dest.IsSingle());
+
+    // If r_dest is a single, then size must be either k32 or kSingle.
+    // If r_dest is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG,
+                   r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  switch (size) {
+    case kDouble:
+    case kWord:
+    case k64:
+      wide = kA64Wide;
+      // Intentional fall-trough.
+    case kSingle:
+    case k32:
+    case kReference:
+      opcode = kA64Ldr4rXxG;
+      break;
+    case kUnsignedHalf:
+      opcode = kA64Ldrh4wXxd;
+      break;
+    case kSignedHalf:
+      opcode = kA64Ldrsh4rXxd;
+      break;
+    case kUnsignedByte:
+      opcode = kA64Ldrb3wXx;
+      break;
+    case kSignedByte:
+      opcode = kA64Ldrsb3rXx;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. ldrb, ldrsb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
+    DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2));
+    load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                   (scale != 0) ? 1 : 0);
+  }
+
+  return load;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+                                    int scale, OpSize size) {
+  LIR* store;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode wide = kA64NotWide;
+
+  DCHECK(scale == 0 || scale == 1);
+
+  if (r_src.IsFloat()) {
+    bool is_double = r_src.IsDouble();
+    bool is_single = !is_double;
+    DCHECK_EQ(is_single, r_src.IsSingle());
+
+    // If r_src is a single, then size must be either k32 or kSingle.
+    // If r_src is a double, then size must be either k64 or kDouble.
+    DCHECK(!is_single || size == k32 || size == kSingle);
+    DCHECK(!is_double || size == k64 || size == kDouble);
+    return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG,
+                   r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  switch (size) {
+    case kDouble:     // Intentional fall-trough.
+    case kWord:       // Intentional fall-trough.
+    case k64:
+      opcode = kA64Str4rXxG;
+      wide = kA64Wide;
+      break;
+    case kSingle:     // Intentional fall-trough.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      opcode = kA64Str4rXxG;
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      opcode = kA64Strh4wXxd;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kA64Strb3wXx;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+    // Tertiary ops (e.g. strb) do not support scale.
+    DCHECK_EQ(scale, 0);
+    store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+  } else {
+    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+  }
+
+  return store;
+}
+
+/*
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated s_reg and MIR).  If not
+ * performing null check, incoming MIR can be null.
+ */
+LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
+                                    OpSize size) {
+  LIR* load = NULL;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int scale = 0;
+
+  switch (size) {
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
+    case k64:
+      scale = 3;
+      if (r_dest.IsFloat()) {
+        DCHECK(r_dest.IsDouble());
+        opcode = FWIDE(kA64Ldr3fXD);
+        alt_opcode = FWIDE(kA64Ldur3fXd);
+      } else {
+        opcode = FWIDE(kA64Ldr3rXD);
+        alt_opcode = FWIDE(kA64Ldur3rXd);
+      }
+      break;
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      scale = 2;
+      if (r_dest.IsFloat()) {
+        DCHECK(r_dest.IsSingle());
+        opcode = kA64Ldr3fXD;
+      } else {
+        opcode = kA64Ldr3rXD;
+      }
+      break;
+    case kUnsignedHalf:
+      scale = 1;
+      opcode = kA64Ldrh3wXF;
+      break;
+    case kSignedHalf:
+      scale = 1;
+      opcode = kA64Ldrsh3rXF;
+      break;
+    case kUnsignedByte:
+      opcode = kA64Ldrb3wXd;
+      break;
+    case kSignedByte:
+      opcode = kA64Ldrsb3rXd;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+  int scaled_disp = displacement >> scale;
+  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+    // Can use scaled load.
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
+  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+    // Can use unscaled load.
+    load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
+  } else {
+    // Use long sequence.
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, displacement);
+    load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
+    FreeTemp(r_scratch);
+  }
+
+  // TODO: in future may need to differentiate Dalvik accesses w/ spills
+  if (r_base == rs_rA64_SP) {
+    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  }
+  return load;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                                        OpSize size) {
+  // LoadBaseDisp() will emit correct insn for atomic load on arm64
+  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
+  return LoadBaseDisp(r_base, displacement, r_dest, size);
+}
+
+LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                                OpSize size) {
+  return LoadBaseDispBody(r_base, displacement, r_dest, size);
+}
+
+
+LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
+                                     OpSize size) {
+  LIR* store = NULL;
+  ArmOpcode opcode = kA64Brk1d;
+  ArmOpcode alt_opcode = kA64Brk1d;
+  int scale = 0;
+
+  switch (size) {
+    case kDouble:     // Intentional fall-through.
+    case kWord:       // Intentional fall-through.
+    case k64:
+      scale = 3;
+      if (r_src.IsFloat()) {
+        DCHECK(r_src.IsDouble());
+        opcode = FWIDE(kA64Str3fXD);
+        alt_opcode = FWIDE(kA64Stur3fXd);
+      } else {
+        opcode = FWIDE(kA64Str3rXD);
+        alt_opcode = FWIDE(kA64Stur3rXd);
+      }
+      break;
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-trough.
+    case kReference:
+      scale = 2;
+      if (r_src.IsFloat()) {
+        DCHECK(r_src.IsSingle());
+        opcode = kA64Str3fXD;
+      } else {
+        opcode = kA64Str3rXD;
+      }
+      break;
+    case kUnsignedHalf:
+    case kSignedHalf:
+      scale = 1;
+      opcode = kA64Strh3wXF;
+      break;
+    case kUnsignedByte:
+    case kSignedByte:
+      opcode = kA64Strb3wXd;
+      break;
+    default:
+      LOG(FATAL) << "Bad size: " << size;
+  }
+
+  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+  int scaled_disp = displacement >> scale;
+  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+    // Can use scaled store.
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
+  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+    // Can use unscaled store.
+    store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
+  } else {
+    // Use long sequence.
+    RegStorage r_scratch = AllocTemp();
+    LoadConstant(r_scratch, displacement);
+    store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+    FreeTemp(r_scratch);
+  }
+
+  // TODO: In future, may need to differentiate Dalvik & spill accesses.
+  if (r_base == rs_rA64_SP) {
+    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  }
+  return store;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                                         OpSize size) {
+  // StoreBaseDisp() will emit correct insn for atomic store on arm64
+  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
+  return StoreBaseDisp(r_base, displacement, r_src, size);
+}
+
+LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                                 OpSize size) {
+  return StoreBaseDispBody(r_base, displacement, r_src, size);
+}
+
+LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be used.";
+  return nullptr;
+}
+
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) {
+  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
+  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                        int displacement, RegStorage r_src, OpSize size) {
+  LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  LOG(FATAL) << "Unexpected use of OpRegMem for Arm64";
+  return NULL;
+}
+
+LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
+                                       int displacement, RegStorage r_dest, OpSize size) {
+  LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm64";
+  return NULL;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 0596d4f..256135d 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -254,7 +254,7 @@
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
     if (v_reg_map.fp_location == kLocPhysReg) {
-      StringAppendF(&buf, " : s%d", v_reg_map.FpReg & FpRegMask());
+      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.FpReg));
     }
 
     std::string buf3;
@@ -364,6 +364,18 @@
   return NULL;
 }
 
+/* Search the existing constants in the literal pool for an exact method match */
+LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method) {
+  while (data_target) {
+    if (static_cast<uint32_t>(data_target->operands[0]) == method.dex_method_index &&
+        UnwrapPointer(data_target->operands[1]) == method.dex_file) {
+      return data_target;
+    }
+    data_target = data_target->next;
+  }
+  return nullptr;
+}
+
 /*
  * The following are building blocks to insert constants into the pool or
  * instruction streams.
@@ -497,6 +509,7 @@
       case kX86_64:
         bx_offset = 0;
         break;
+      case kArm64:
       case kMips:
         bx_offset = tab_rec->anchor->offset;
         break;
@@ -558,7 +571,7 @@
 static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
                                             unsigned int element_size) {
   // Align to natural pointer size.
-  offset = (offset + (element_size - 1)) & ~(element_size - 1);
+  offset = RoundUp(offset, element_size);
   for (; lir != NULL; lir = lir->next) {
     lir->offset = offset;
     offset += element_size;
@@ -758,7 +771,7 @@
     tab_rec->offset = offset;
     offset += tab_rec->size;
     // word align
-    offset = (offset + 3) & ~3;
+    offset = RoundUp(offset, 4);
     }
   return offset;
 }
@@ -942,7 +955,7 @@
       switch_tables_(arena, 4, kGrowableArraySwitchTables),
       fill_array_data_(arena, 4, kGrowableArrayFillArrayData),
       tempreg_info_(arena, 20, kGrowableArrayMisc),
-      reginfo_map_(arena, 64, kGrowableArrayMisc),
+      reginfo_map_(arena, RegStorage::kMaxRegs, kGrowableArrayMisc),
       pointer_storage_(arena, 128, kGrowableArrayMisc),
       data_offset_(0),
       total_size_(0),
@@ -990,7 +1003,7 @@
     /* Convert LIR into machine code. */
     AssembleLIR();
 
-    if (cu_->verbose) {
+    if ((cu_->enable_debug & (1 << kDebugCodegenDump)) != 0) {
       CodegenDump();
     }
   }
@@ -1025,9 +1038,9 @@
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
-  UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
+  std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnCallFrameInformation());
   CompiledMethod* result =
-      new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
+      new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
                          vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
   return result;
@@ -1049,13 +1062,13 @@
 
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
-  static const uint32_t kAlignMask = kStackAlignment - 1;
-  uint32_t size = ((num_core_spills_ + num_fp_spills_ +
-                   1 /* filler word */ + cu_->num_regs + cu_->num_outs)
-                   * sizeof(uint32_t)) +
-                   GetNumBytesForCompilerTempSpillRegion();
+  uint32_t size = num_core_spills_ * GetBytesPerGprSpillLocation(cu_->instruction_set)
+                  + num_fp_spills_ * GetBytesPerFprSpillLocation(cu_->instruction_set)
+                  + sizeof(uint32_t)  // Filler.
+                  + (cu_->num_regs + cu_->num_outs) * sizeof(uint32_t)
+                  + GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
-  return (size + kAlignMask) & ~(kAlignMask);
+  return RoundUp(size, kStackAlignment);
 }
 
 /*
@@ -1142,11 +1155,13 @@
 
 void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType type,
                               SpecialTargetRegister symbolic_reg) {
-  int target_method_idx = target_method.dex_method_index;
-  LIR* data_target = ScanLiteralPool(code_literal_list_, target_method_idx, 0);
+  LIR* data_target = ScanLiteralPoolMethod(code_literal_list_, target_method);
   if (data_target == NULL) {
-    data_target = AddWordData(&code_literal_list_, target_method_idx);
+    data_target = AddWordData(&code_literal_list_, target_method.dex_method_index);
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
+    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
+    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
+    // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
@@ -1156,11 +1171,13 @@
 
 void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
                                 SpecialTargetRegister symbolic_reg) {
-  int target_method_idx = target_method.dex_method_index;
-  LIR* data_target = ScanLiteralPool(method_literal_list_, target_method_idx, 0);
+  LIR* data_target = ScanLiteralPoolMethod(method_literal_list_, target_method);
   if (data_target == NULL) {
-    data_target = AddWordData(&method_literal_list_, target_method_idx);
+    data_target = AddWordData(&method_literal_list_, target_method.dex_method_index);
     data_target->operands[1] = WrapPointer(const_cast<DexFile*>(target_method.dex_file));
+    // NOTE: The invoke type doesn't contribute to the literal identity. In fact, we can have
+    // the same method invoked with kVirtual, kSuper and kInterface but the class linker will
+    // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
@@ -1185,10 +1202,25 @@
 
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
   loc.wide = false;
-  if (loc.reg.IsPair()) {
-    loc.reg = loc.reg.GetLow();
+  if (loc.location == kLocPhysReg) {
+    if (loc.reg.IsPair()) {
+      loc.reg = loc.reg.GetLow();
+    } else {
+      // FIXME: temp workaround.
+      // Issue here: how do we narrow to a 32-bit value in 64-bit container?
+      // Probably the wrong thing to narrow the RegStorage container here.  That
+      // should be a target decision.  At the RegLocation level, we're only
+      // modifying the view of the Dalvik value - this is orthogonal to the storage
+      // container size.  Consider this a temp workaround.
+      DCHECK(loc.reg.IsDouble());
+      loc.reg = loc.reg.DoubleToLowSingle();
+    }
   }
   return loc;
 }
 
+void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+  LOG(FATAL) << "Unknown MIR opcode not supported on this architecture";
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 3ec31ba..526c981 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -35,15 +35,9 @@
 namespace {  // anonymous namespace
 
 MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) {
-  ArenaAllocator* arena = mir_graph->GetArena();
-  MIR* insn = static_cast<MIR*>(arena->Alloc(sizeof(MIR), kArenaAllocMIR));
+  MIR* insn = mir_graph->NewMIR();
   insn->offset = invoke->offset;
-  insn->width = invoke->width;
   insn->optimization_flags = MIR_CALLEE;
-  if (move_return != nullptr) {
-    DCHECK_EQ(move_return->offset, invoke->offset + invoke->width);
-    insn->width += move_return->width;
-  }
   return insn;
 }
 
@@ -660,7 +654,6 @@
   }
 
   MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result);
-  insn->width += insn->offset - invoke->offset;
   insn->offset = invoke->offset;
   insn->dalvikInsn.opcode = opcode;
   insn->dalvikInsn.vA = move_result->dalvikInsn.vA;
@@ -737,9 +730,7 @@
 
   if (move_result != nullptr) {
     MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result);
-    insn->width = invoke->width;
     move->offset = move_result->offset;
-    move->width = move_result->width;
     if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) {
       move->dalvikInsn.opcode = Instruction::MOVE_FROM16;
     } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 8b9a686..7e3c8ce 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -73,7 +73,11 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero), true);
+      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+        m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowDivZero), true);
+      } else {
+        m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero), true);
+      }
     }
   };
 
@@ -92,8 +96,13 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    index_, length_, true);
+      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      index_, length_, true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      index_, length_, true);
+      }
     }
 
    private:
@@ -120,8 +129,13 @@
 
       m2l_->OpRegCopy(m2l_->TargetReg(kArg1), length_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -144,7 +158,11 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowNullPointer), true);
+      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+        m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowNullPointer), true);
+      } else {
+        m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowNullPointer), true);
+      }
     }
   };
 
@@ -314,6 +332,49 @@
   StoreValue(rl_dest, rl_result);
 }
 
+template <size_t pointer_size>
+static void GenNewArrayImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu,
+                            uint32_t type_idx, RegLocation rl_dest,
+                            RegLocation rl_src) {
+  mir_to_lir->FlushAllRegs();  /* Everything to home location */
+  ThreadOffset<pointer_size> func_offset(-1);
+  const DexFile* dex_file = cu->dex_file;
+  CompilerDriver* driver = cu->compiler_driver;
+  if (cu->compiler_driver->CanAccessTypeWithoutChecks(cu->method_idx, *dex_file,
+                                                      type_idx)) {
+    bool is_type_initialized;  // Ignored as an array does not have an initializer.
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    bool is_finalizable;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
+                                   &direct_type_ptr, &is_finalizable)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        mir_to_lir->LoadClassType(type_idx, kArg0);
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayResolved);
+        mir_to_lir->CallRuntimeHelperRegMethodRegLocation(func_offset, mir_to_lir->TargetReg(kArg0),
+                                                          rl_src, true);
+      } else {
+        // Use the direct pointer.
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayResolved);
+        mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, direct_type_ptr, rl_src,
+                                                          true);
+      }
+    } else {
+      // The slow path.
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArray);
+      mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
+  } else {
+    func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck);
+    mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
+  }
+  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  mir_to_lir->StoreValue(rl_dest, rl_result);
+}
+
 /*
  * Let helper function take care of everything.  Will call
  * Array::AllocFromCode(type_idx, method, count);
@@ -321,41 +382,23 @@
  */
 void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
                           RegLocation rl_src) {
-  FlushAllRegs();  /* Everything to home location */
-  ThreadOffset<4> func_offset(-1);
-  const DexFile* dex_file = cu_->dex_file;
-  CompilerDriver* driver = cu_->compiler_driver;
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *dex_file,
-                                                       type_idx)) {
-    bool is_type_initialized;  // Ignored as an array does not have an initializer.
-    bool use_direct_type_ptr;
-    uintptr_t direct_type_ptr;
-    if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx,
-                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
-      // The fast path.
-      if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
-        func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocArrayResolved);
-        CallRuntimeHelperRegMethodRegLocation(func_offset, TargetReg(kArg0), rl_src, true);
-      } else {
-        // Use the direct pointer.
-        func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocArrayResolved);
-        CallRuntimeHelperImmMethodRegLocation(func_offset, direct_type_ptr, rl_src, true);
-      }
-    } else {
-      // The slow path.
-      DCHECK_EQ(func_offset.Int32Value(), -1);
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocArray);
-      CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
-    }
-    DCHECK_NE(func_offset.Int32Value(), -1);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenNewArrayImpl<8>(this, cu_, type_idx, rl_dest, rl_src);
   } else {
-    func_offset= QUICK_ENTRYPOINT_OFFSET(4, pAllocArrayWithAccessCheck);
-    CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
+    GenNewArrayImpl<4>(this, cu_, type_idx, rl_dest, rl_src);
   }
-  RegLocation rl_result = GetReturn(false);
-  StoreValue(rl_dest, rl_result);
+}
+
+template <size_t pointer_size>
+static void GenFilledNewArrayCall(Mir2Lir* mir_to_lir, CompilationUnit* cu, int elems, int type_idx) {
+  ThreadOffset<pointer_size> func_offset(-1);
+  if (cu->compiler_driver->CanAccessTypeWithoutChecks(cu->method_idx, *cu->dex_file,
+                                                      type_idx)) {
+    func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pCheckAndAllocArray);
+  } else {
+    func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pCheckAndAllocArrayWithAccessCheck);
+  }
+  mir_to_lir->CallRuntimeHelperImmMethodImm(func_offset, type_idx, elems, true);
 }
 
 /*
@@ -368,14 +411,11 @@
   int elems = info->num_arg_words;
   int type_idx = info->index;
   FlushAllRegs();  /* Everything to home location */
-  ThreadOffset<4> func_offset(-1);
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
-                                                       type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(4, pCheckAndAllocArray);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenFilledNewArrayCall<8>(this, cu_, elems, type_idx);
   } else {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(4, pCheckAndAllocArrayWithAccessCheck);
+    GenFilledNewArrayCall<4>(this, cu_, elems, type_idx);
   }
-  CallRuntimeHelperImmMethodImm(func_offset, type_idx, elems, true);
   FreeTemp(TargetReg(kArg2));
   FreeTemp(TargetReg(kArg1));
   /*
@@ -481,8 +521,13 @@
   void Compile() {
     LIR* unresolved_target = GenerateTargetLabel();
     uninit_->target = unresolved_target;
-    m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeStaticStorage),
-                               storage_index_, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeStaticStorage),
+                                 storage_index_, true);
+    } else {
+      m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeStaticStorage),
+                                 storage_index_, true);
+    }
     // Copy helper's result into r_base, a no-op on all but MIPS.
     m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0));
 
@@ -495,11 +540,24 @@
   const RegStorage r_base_;
 };
 
+template <size_t pointer_size>
+static void GenSputCall(Mir2Lir* mir_to_lir, bool is_long_or_double, bool is_object,
+                        const MirSFieldLoweringInfo* field_info, RegLocation rl_src) {
+  ThreadOffset<pointer_size> setter_offset =
+      is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pSet64Static)
+          : (is_object ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pSetObjStatic)
+              : QUICK_ENTRYPOINT_OFFSET(pointer_size, pSet32Static));
+  mir_to_lir->CallRuntimeHelperImmRegLocation(setter_offset, field_info->FieldIndex(), rl_src,
+                                              true);
+}
+
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
                       bool is_object) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
-  if (field_info.FastPut() && !SLOW_FIELD_PATH) {
+  OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
+  if (!SLOW_FIELD_PATH && field_info.FastPut() &&
+      (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -549,30 +607,20 @@
       FreeTemp(r_method);
     }
     // rBase now holds static storage base
+    RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     if (is_long_or_double) {
-      RegisterClass register_kind = kAnyReg;
-      if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
-        // Force long/double volatile stores into SSE registers to avoid tearing.
-        register_kind = kFPReg;
-      }
-      rl_src = LoadValueWide(rl_src, register_kind);
+      rl_src = LoadValueWide(rl_src, reg_class);
     } else {
-      rl_src = LoadValue(rl_src, kAnyReg);
+      rl_src = LoadValue(rl_src, reg_class);
     }
     if (field_info.IsVolatile()) {
       // There might have been a store before this volatile one so insert StoreStore barrier.
       GenMemBarrier(kStoreStore);
-    }
-    if (is_long_or_double) {
-      StoreBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    } else if (rl_src.ref) {
-      StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    } else {
-      Store32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg);
-    }
-    if (field_info.IsVolatile()) {
+      StoreBaseDispVolatile(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size);
       // A load might follow the volatile store so insert a StoreLoad barrier.
       GenMemBarrier(kStoreLoad);
+    } else {
+      StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size);
     }
     if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
       MarkGCCard(rl_src.reg, r_base);
@@ -580,19 +628,31 @@
     FreeTemp(r_base);
   } else {
     FlushAllRegs();  // Everything to home locations
-    ThreadOffset<4> setter_offset =
-        is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(4, pSet64Static)
-                          : (is_object ? QUICK_ENTRYPOINT_OFFSET(4, pSetObjStatic)
-                                       : QUICK_ENTRYPOINT_OFFSET(4, pSet32Static));
-    CallRuntimeHelperImmRegLocation(setter_offset, field_info.FieldIndex(), rl_src, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      GenSputCall<8>(this, is_long_or_double, is_object, &field_info, rl_src);
+    } else {
+      GenSputCall<4>(this, is_long_or_double, is_object, &field_info, rl_src);
+    }
   }
 }
 
+template <size_t pointer_size>
+static void GenSgetCall(Mir2Lir* mir_to_lir, bool is_long_or_double, bool is_object,
+                        const MirSFieldLoweringInfo* field_info) {
+  ThreadOffset<pointer_size> getter_offset =
+      is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet64Static)
+          : (is_object ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGetObjStatic)
+              : QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet32Static));
+  mir_to_lir->CallRuntimeHelperImm(getter_offset, field_info->FieldIndex(), true);
+}
+
 void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
                       bool is_long_or_double, bool is_object) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
-  if (field_info.FastGet() && !SLOW_FIELD_PATH) {
+  OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
+  if (!SLOW_FIELD_PATH && field_info.FastGet() &&
+      (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -638,28 +698,20 @@
       FreeTemp(r_method);
     }
     // r_base now holds static storage base
-    RegisterClass result_reg_kind = kAnyReg;
-    if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
-      // Force long/double volatile loads into SSE registers to avoid tearing.
-      result_reg_kind = kFPReg;
-    }
-    RegLocation rl_result = EvalLoc(rl_dest, result_reg_kind, true);
+    RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
+    RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
 
-    if (is_long_or_double) {
-      LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, INVALID_SREG);
-    } else if (rl_result.ref) {
-      LoadRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
-    } else {
-      Load32Disp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg);
-    }
-    FreeTemp(r_base);
-
+    int field_offset = field_info.FieldOffset().Int32Value();
     if (field_info.IsVolatile()) {
+      LoadBaseDispVolatile(r_base, field_offset, rl_result.reg, load_size);
       // Without context sensitive analysis, we must issue the most conservative barriers.
       // In this case, either a load or store may follow so we issue both barriers.
       GenMemBarrier(kLoadLoad);
       GenMemBarrier(kLoadStore);
+    } else {
+      LoadBaseDisp(r_base, field_offset, rl_result.reg, load_size);
     }
+    FreeTemp(r_base);
 
     if (is_long_or_double) {
       StoreValueWide(rl_dest, rl_result);
@@ -668,11 +720,11 @@
     }
   } else {
     FlushAllRegs();  // Everything to home locations
-    ThreadOffset<4> getterOffset =
-        is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(4, pGet64Static)
-                          :(is_object ? QUICK_ENTRYPOINT_OFFSET(4, pGetObjStatic)
-                                      : QUICK_ENTRYPOINT_OFFSET(4, pGet32Static));
-    CallRuntimeHelperImm(getterOffset, field_info.FieldIndex(), true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      GenSgetCall<8>(this, is_long_or_double, is_object, &field_info);
+    } else {
+      GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
+    }
     if (is_long_or_double) {
       RegLocation rl_result = GetReturnWide(rl_dest.fp);
       StoreValueWide(rl_dest, rl_result);
@@ -693,70 +745,53 @@
   slow_paths_.Reset();
 }
 
+template <size_t pointer_size>
+static void GenIgetCall(Mir2Lir* mir_to_lir, bool is_long_or_double, bool is_object,
+                        const MirIFieldLoweringInfo* field_info, RegLocation rl_obj) {
+  ThreadOffset<pointer_size> getter_offset =
+      is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet64Instance)
+          : (is_object ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pGetObjInstance)
+              : QUICK_ENTRYPOINT_OFFSET(pointer_size, pGet32Instance));
+  mir_to_lir->CallRuntimeHelperImmRegLocation(getter_offset, field_info->FieldIndex(), rl_obj,
+                                              true);
+}
+
 void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
                       RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double,
                       bool is_object) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
-  if (field_info.FastGet() && !SLOW_FIELD_PATH) {
-    RegLocation rl_result;
-    RegisterClass reg_class = oat_reg_class_by_size(size);
+  OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
+  if (!SLOW_FIELD_PATH && field_info.FastGet() &&
+      (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
+    RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kCoreReg);
+    GenNullCheck(rl_obj.reg, opt_flags);
+    RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
+    int field_offset = field_info.FieldOffset().Int32Value();
+    if (field_info.IsVolatile()) {
+      LoadBaseDispVolatile(rl_obj.reg, field_offset, rl_result.reg, load_size);
+      MarkPossibleNullPointerException(opt_flags);
+      // Without context sensitive analysis, we must issue the most conservative barriers.
+      // In this case, either a load or store may follow so we issue both barriers.
+      GenMemBarrier(kLoadLoad);
+      GenMemBarrier(kLoadStore);
+    } else {
+      LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, load_size);
+      MarkPossibleNullPointerException(opt_flags);
+    }
     if (is_long_or_double) {
-      DCHECK(rl_dest.wide);
-      GenNullCheck(rl_obj.reg, opt_flags);
-      if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        RegisterClass result_reg_kind = kAnyReg;
-        if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
-          // Force long/double volatile loads into SSE registers to avoid tearing.
-          result_reg_kind = kFPReg;
-        }
-        rl_result = EvalLoc(rl_dest, result_reg_kind, true);
-        LoadBaseDispWide(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg,
-                         rl_obj.s_reg_low);
-        MarkPossibleNullPointerException(opt_flags);
-        if (field_info.IsVolatile()) {
-          // Without context sensitive analysis, we must issue the most conservative barriers.
-          // In this case, either a load or store may follow so we issue both barriers.
-          GenMemBarrier(kLoadLoad);
-          GenMemBarrier(kLoadStore);
-        }
-      } else {
-        RegStorage reg_ptr = AllocTemp();
-        OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
-        rl_result = EvalLoc(rl_dest, reg_class, true);
-        LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
-        MarkPossibleNullPointerException(opt_flags);
-        if (field_info.IsVolatile()) {
-          // Without context sensitive analysis, we must issue the most conservative barriers.
-          // In this case, either a load or store may follow so we issue both barriers.
-          GenMemBarrier(kLoadLoad);
-          GenMemBarrier(kLoadStore);
-        }
-        FreeTemp(reg_ptr);
-      }
       StoreValueWide(rl_dest, rl_result);
     } else {
-      rl_result = EvalLoc(rl_dest, reg_class, true);
-      GenNullCheck(rl_obj.reg, opt_flags);
-      LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32,
-                   rl_obj.s_reg_low);
-      MarkPossibleNullPointerException(opt_flags);
-      if (field_info.IsVolatile()) {
-        // Without context sensitive analysis, we must issue the most conservative barriers.
-        // In this case, either a load or store may follow so we issue both barriers.
-        GenMemBarrier(kLoadLoad);
-        GenMemBarrier(kLoadStore);
-      }
       StoreValue(rl_dest, rl_result);
     }
   } else {
-    ThreadOffset<4> getterOffset =
-        is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(4, pGet64Instance)
-                          : (is_object ? QUICK_ENTRYPOINT_OFFSET(4, pGetObjInstance)
-                                       : QUICK_ENTRYPOINT_OFFSET(4, pGet32Instance));
-    CallRuntimeHelperImmRegLocation(getterOffset, field_info.FieldIndex(), rl_obj, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      GenIgetCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj);
+    } else {
+      GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
+    }
     if (is_long_or_double) {
       RegLocation rl_result = GetReturnWide(rl_dest.fp);
       StoreValueWide(rl_dest, rl_result);
@@ -767,73 +802,80 @@
   }
 }
 
+template <size_t pointer_size>
+static void GenIputCall(Mir2Lir* mir_to_lir, bool is_long_or_double, bool is_object,
+                        const MirIFieldLoweringInfo* field_info, RegLocation rl_obj,
+                        RegLocation rl_src) {
+  ThreadOffset<pointer_size> setter_offset =
+      is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pSet64Instance)
+          : (is_object ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pSetObjInstance)
+              : QUICK_ENTRYPOINT_OFFSET(pointer_size, pSet32Instance));
+  mir_to_lir->CallRuntimeHelperImmRegLocationRegLocation(setter_offset, field_info->FieldIndex(),
+                                                         rl_obj, rl_src, true);
+}
+
 void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
                       RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double,
                       bool is_object) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
-  if (field_info.FastPut() && !SLOW_FIELD_PATH) {
-    RegisterClass reg_class = oat_reg_class_by_size(size);
+  OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
+  if (!SLOW_FIELD_PATH && field_info.FastPut() &&
+      (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
+    RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kCoreReg);
     if (is_long_or_double) {
-      RegisterClass src_reg_kind = kAnyReg;
-      if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
-        // Force long/double volatile stores into SSE registers to avoid tearing.
-        src_reg_kind = kFPReg;
-      }
-      rl_src = LoadValueWide(rl_src, src_reg_kind);
-      GenNullCheck(rl_obj.reg, opt_flags);
-      RegStorage reg_ptr = AllocTemp();
-      OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
-      if (field_info.IsVolatile()) {
-        // There might have been a store before this volatile one so insert StoreStore barrier.
-        GenMemBarrier(kStoreStore);
-      }
-      StoreBaseDispWide(reg_ptr, 0, rl_src.reg);
-      MarkPossibleNullPointerException(opt_flags);
-      if (field_info.IsVolatile()) {
-        // A load might follow the volatile store so insert a StoreLoad barrier.
-        GenMemBarrier(kStoreLoad);
-      }
-      FreeTemp(reg_ptr);
+      rl_src = LoadValueWide(rl_src, reg_class);
     } else {
       rl_src = LoadValue(rl_src, reg_class);
-      GenNullCheck(rl_obj.reg, opt_flags);
-      if (field_info.IsVolatile()) {
-        // There might have been a store before this volatile one so insert StoreStore barrier.
-        GenMemBarrier(kStoreStore);
-      }
-      Store32Disp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_src.reg);
+    }
+    GenNullCheck(rl_obj.reg, opt_flags);
+    int field_offset = field_info.FieldOffset().Int32Value();
+    if (field_info.IsVolatile()) {
+      // There might have been a store before this volatile one so insert StoreStore barrier.
+      GenMemBarrier(kStoreStore);
+      StoreBaseDispVolatile(rl_obj.reg, field_offset, rl_src.reg, store_size);
       MarkPossibleNullPointerException(opt_flags);
-      if (field_info.IsVolatile()) {
-        // A load might follow the volatile store so insert a StoreLoad barrier.
-        GenMemBarrier(kStoreLoad);
-      }
-      if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
-        MarkGCCard(rl_src.reg, rl_obj.reg);
-      }
+      // A load might follow the volatile store so insert a StoreLoad barrier.
+      GenMemBarrier(kStoreLoad);
+    } else {
+      StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, store_size);
+      MarkPossibleNullPointerException(opt_flags);
+    }
+    if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
+      MarkGCCard(rl_src.reg, rl_obj.reg);
     }
   } else {
-    ThreadOffset<4> setter_offset =
-        is_long_or_double ? QUICK_ENTRYPOINT_OFFSET(4, pSet64Instance)
-                          : (is_object ? QUICK_ENTRYPOINT_OFFSET(4, pSetObjInstance)
-                                       : QUICK_ENTRYPOINT_OFFSET(4, pSet32Instance));
-    CallRuntimeHelperImmRegLocationRegLocation(setter_offset, field_info.FieldIndex(),
-                                               rl_obj, rl_src, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      GenIputCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
+    } else {
+      GenIputCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
+    }
   }
 }
 
+template <size_t pointer_size>
+static void GenArrayObjPutCall(Mir2Lir* mir_to_lir, bool needs_range_check, bool needs_null_check,
+                               RegLocation rl_array, RegLocation rl_index, RegLocation rl_src) {
+  ThreadOffset<pointer_size> helper = needs_range_check
+        ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(pointer_size, pAputObjectWithNullAndBoundCheck)
+                            : QUICK_ENTRYPOINT_OFFSET(pointer_size, pAputObjectWithBoundCheck))
+        : QUICK_ENTRYPOINT_OFFSET(pointer_size, pAputObject);
+  mir_to_lir->CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src,
+                                                                 true);
+}
+
 void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
                              RegLocation rl_src) {
   bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
   bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
       (opt_flags & MIR_IGNORE_NULL_CHECK));
-  ThreadOffset<4> helper = needs_range_check
-      ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(4, pAputObjectWithNullAndBoundCheck)
-                          : QUICK_ENTRYPOINT_OFFSET(4, pAputObjectWithBoundCheck))
-      : QUICK_ENTRYPOINT_OFFSET(4, pAputObject);
-  CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenArrayObjPutCall<8>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
+  } else {
+    GenArrayObjPutCall<4>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
+  }
 }
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
@@ -845,8 +887,13 @@
                                                    type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                            type_idx, rl_method.reg, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                              type_idx, rl_method.reg, true);
+    } else {
+      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                              type_idx, rl_method.reg, true);
+    }
     RegLocation rl_result = GetReturn(false);
     StoreValue(rl_dest, rl_result);
   } else {
@@ -875,8 +922,13 @@
         void Compile() {
           GenerateTargetLabel();
 
-          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
-                                        rl_method_.reg, true);
+          if (Is64BitInstructionSet(cu_->instruction_set)) {
+            m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
+                                          rl_method_.reg, true);
+          } else {
+            m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
+                                                      rl_method_.reg, true);
+          }
           m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetReg(kRet0));
 
           m2l_->OpUnconditionalBranch(cont_);
@@ -939,8 +991,13 @@
 
         void Compile() {
           GenerateTargetLabel();
-          m2l_->CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
-                                        r_method_, string_idx_, true);
+          if (Is64BitInstructionSet(cu_->instruction_set)) {
+            m2l_->CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pResolveString),
+                                          r_method_, string_idx_, true);
+          } else {
+            m2l_->CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
+                                          r_method_, string_idx_, true);
+          }
           m2l_->OpUnconditionalBranch(cont_);
         }
 
@@ -964,63 +1021,79 @@
   }
 }
 
-/*
- * Let helper function take care of everything.  Will
- * call Class::NewInstanceFromCode(type_idx, method);
- */
-void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
-  FlushAllRegs();  /* Everything to home location */
+template <size_t pointer_size>
+static void GenNewInstanceImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, uint32_t type_idx,
+                               RegLocation rl_dest) {
+  mir_to_lir->FlushAllRegs();  /* Everything to home location */
   // alloc will always check for resolution, do we also need to verify
   // access because the verifier was unable to?
-  ThreadOffset<4> func_offset(-1);
-  const DexFile* dex_file = cu_->dex_file;
-  CompilerDriver* driver = cu_->compiler_driver;
+  ThreadOffset<pointer_size> func_offset(-1);
+  const DexFile* dex_file = cu->dex_file;
+  CompilerDriver* driver = cu->compiler_driver;
   if (driver->CanAccessInstantiableTypeWithoutChecks(
-      cu_->method_idx, *dex_file, type_idx)) {
+      cu->method_idx, *dex_file, type_idx)) {
     bool is_type_initialized;
     bool use_direct_type_ptr;
     uintptr_t direct_type_ptr;
+    bool is_finalizable;
     if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx,
-                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
+                                   &direct_type_ptr, &is_finalizable) &&
+                                   !is_finalizable) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
+        mir_to_lir->LoadClassType(type_idx, kArg0);
         if (!is_type_initialized) {
-          func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObjectResolved);
-          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectResolved);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0), true);
         } else {
-          func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObjectInitialized);
-          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectInitialized);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0), true);
         }
       } else {
         // Use the direct pointer.
         if (!is_type_initialized) {
-          func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObjectResolved);
-          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectResolved);
+          mir_to_lir->CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
         } else {
-          func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObjectInitialized);
-          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectInitialized);
+          mir_to_lir->CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
         }
       }
     } else {
       // The slow path.
       DCHECK_EQ(func_offset.Int32Value(), -1);
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObject);
-      CallRuntimeHelperImmMethod(func_offset, type_idx, true);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObject);
+      mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
     }
     DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(4, pAllocObjectWithAccessCheck);
-    CallRuntimeHelperImmMethod(func_offset, type_idx, true);
+    func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck);
+    mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  RegLocation rl_result = GetReturn(false);
-  StoreValue(rl_dest, rl_result);
+  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  mir_to_lir->StoreValue(rl_dest, rl_result);
+}
+
+/*
+ * Let helper function take care of everything.  Will
+ * call Class::NewInstanceFromCode(type_idx, method);
+ */
+void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenNewInstanceImpl<8>(this, cu_, type_idx, rl_dest);
+  } else {
+    GenNewInstanceImpl<4>(this, cu_, type_idx, rl_dest);
+  }
 }
 
 void Mir2Lir::GenThrow(RegLocation rl_src) {
   FlushAllRegs();
-  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException), rl_src, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), rl_src, true);
+  } else {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException), rl_src, true);
+  }
 }
 
 // For final classes there are no sub-classes to check and so we can answer the instance-of
@@ -1095,8 +1168,13 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
-    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                         type_idx, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    } else {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    }
     OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
   } else if (use_declaring_class) {
@@ -1115,7 +1193,11 @@
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Not resolved
       // Call out to helper, which will return resolved type in kRet0
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
+      } else {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      }
       OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path
       LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* reload Ref */
       // Rejoin code paths
@@ -1151,7 +1233,9 @@
     }
   } else {
     if (cu_->instruction_set == kThumb2) {
-      RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+          LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
+          LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       LIR* it = nullptr;
       if (!type_known_abstract) {
       /* Uses conditional nullification */
@@ -1171,7 +1255,9 @@
         LoadConstant(rl_result.reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
       }
-      RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+          LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
+          LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       FreeTemp(r_tgt);
@@ -1232,8 +1318,13 @@
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
-    CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                            type_idx, TargetReg(kArg1), true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                              type_idx, TargetReg(kArg1), true);
+    } else {
+      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                              type_idx, TargetReg(kArg1), true);
+    }
     OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
   } else if (use_declaring_class) {
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
@@ -1263,11 +1354,17 @@
 
           // Call out to helper, which will return resolved type in kArg0
           // InitializeTypeFromCode(idx, method)
-          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
-                                        m2l_->TargetReg(kArg1), true);
+          if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+            m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
+                                          m2l_->TargetReg(kArg1), true);
+          } else {
+            m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
+                                                      m2l_->TargetReg(kArg1), true);
+          }
           m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
           m2l_->OpUnconditionalBranch(cont_);
         }
+
        public:
         const int type_idx_;
         const RegStorage class_reg_;
@@ -1294,8 +1391,13 @@
         m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
                           m2l_->TargetReg(kArg1));
       }
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetReg(kArg2),
-                                    m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetReg(kArg2),
+                                      m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetReg(kArg2),
+                                              m2l_->TargetReg(kArg1), true);
+      }
 
       m2l_->OpUnconditionalBranch(cont_);
     }
@@ -1377,28 +1479,38 @@
 }
 
 
-void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_shift) {
-  ThreadOffset<4> func_offset(-1);
+template <size_t pointer_size>
+static void GenShiftOpLongCall(Mir2Lir* mir_to_lir, Instruction::Code opcode, RegLocation rl_src1,
+                               RegLocation rl_shift) {
+  ThreadOffset<pointer_size> func_offset(-1);
 
   switch (opcode) {
     case Instruction::SHL_LONG:
     case Instruction::SHL_LONG_2ADDR:
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pShlLong);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pShlLong);
       break;
     case Instruction::SHR_LONG:
     case Instruction::SHR_LONG_2ADDR:
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pShrLong);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pShrLong);
       break;
     case Instruction::USHR_LONG:
     case Instruction::USHR_LONG_2ADDR:
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pUshrLong);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pUshrLong);
       break;
     default:
       LOG(FATAL) << "Unexpected case";
   }
-  FlushAllRegs();   /* Send everything to home location */
-  CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_shift, false);
+  mir_to_lir->FlushAllRegs();   /* Send everything to home location */
+  mir_to_lir->CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_shift, false);
+}
+
+void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_shift) {
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenShiftOpLongCall<8>(this, opcode, rl_src1, rl_shift);
+  } else {
+    GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
+  }
   RegLocation rl_result = GetReturnWide(false);
   StoreValueWide(rl_dest, rl_result);
 }
@@ -1483,7 +1595,7 @@
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       OpRegReg(op, rl_result.reg, rl_src1.reg);
     } else {
-      if (shift_op) {
+      if ((shift_op) && (cu_->instruction_set != kArm64)) {
         rl_src2 = LoadValue(rl_src2, kCoreReg);
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpAnd, t_reg, rl_src2.reg, 31);
@@ -1501,7 +1613,7 @@
     StoreValue(rl_dest, rl_result);
   } else {
     bool done = false;      // Set to true if we happen to find a way to use a real instruction.
-    if (cu_->instruction_set == kMips) {
+    if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
       rl_src1 = LoadValue(rl_src1, kCoreReg);
       rl_src2 = LoadValue(rl_src2, kCoreReg);
       if (check_zero) {
@@ -1525,16 +1637,21 @@
 
     // If we haven't already generated the code use the callout function.
     if (!done) {
-      ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pIdivmod);
       FlushAllRegs();   /* Send everything to home location */
       LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
-      RegStorage r_tgt = CallHelperSetup(func_offset);
+      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+          CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod)) :
+          CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod));
       LoadValueDirectFixed(rl_src1, TargetReg(kArg0));
       if (check_zero) {
         GenDivZeroCheck(TargetReg(kArg1));
       }
       // NOTE: callout here is not a safepoint.
-      CallHelper(r_tgt, func_offset, false /* not a safepoint */);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), false /* not a safepoint */);
+      } else {
+        CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
+      }
       if (op == kOpDiv)
         rl_result = GetReturn(false);
       else
@@ -1772,7 +1889,7 @@
       }
 
       bool done = false;
-      if (cu_->instruction_set == kMips) {
+      if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.reg, lit, is_div);
         done = true;
@@ -1793,8 +1910,13 @@
         FlushAllRegs();   /* Everything to home location. */
         LoadValueDirectFixed(rl_src, TargetReg(kArg0));
         Clobber(TargetReg(kArg0));
-        ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pIdivmod);
-        CallRuntimeHelperRegImm(func_offset, TargetReg(kArg0), lit, false);
+        if (Is64BitInstructionSet(cu_->instruction_set)) {
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0), lit,
+                                  false);
+        } else {
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), TargetReg(kArg0), lit,
+                                  false);
+        }
         if (is_div)
           rl_result = GetReturn(false);
         else
@@ -1817,37 +1939,42 @@
   StoreValue(rl_dest, rl_result);
 }
 
-void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_src2) {
+template <size_t pointer_size>
+static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruction::Code opcode,
+                               RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
   RegLocation rl_result;
   OpKind first_op = kOpBkpt;
   OpKind second_op = kOpBkpt;
   bool call_out = false;
   bool check_zero = false;
-  ThreadOffset<4> func_offset(-1);
-  int ret_reg = TargetReg(kRet0).GetReg();
+  ThreadOffset<pointer_size> func_offset(-1);
+  int ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
 
   switch (opcode) {
     case Instruction::NOT_LONG:
-      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-      rl_result = EvalLoc(rl_dest, kCoreReg, true);
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenNotLong(rl_dest, rl_src2);
+        return;
+      }
+      rl_src2 = mir_to_lir->LoadValueWide(rl_src2, kCoreReg);
+      rl_result = mir_to_lir->EvalLoc(rl_dest, kCoreReg, true);
       // Check for destructive overlap
       if (rl_result.reg.GetLowReg() == rl_src2.reg.GetHighReg()) {
-        RegStorage t_reg = AllocTemp();
-        OpRegCopy(t_reg, rl_src2.reg.GetHigh());
-        OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
-        OpRegReg(kOpMvn, rl_result.reg.GetHigh(), t_reg);
-        FreeTemp(t_reg);
+        RegStorage t_reg = mir_to_lir->AllocTemp();
+        mir_to_lir->OpRegCopy(t_reg, rl_src2.reg.GetHigh());
+        mir_to_lir->OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
+        mir_to_lir->OpRegReg(kOpMvn, rl_result.reg.GetHigh(), t_reg);
+        mir_to_lir->FreeTemp(t_reg);
       } else {
-        OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
-        OpRegReg(kOpMvn, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
+        mir_to_lir->OpRegReg(kOpMvn, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
+        mir_to_lir->OpRegReg(kOpMvn, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
       }
-      StoreValueWide(rl_dest, rl_result);
+      mir_to_lir->StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
-      if (cu_->instruction_set != kThumb2) {
-        GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set != kThumb2) {
+        mir_to_lir->GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpAdd;
@@ -1855,8 +1982,8 @@
       break;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
-      if (cu_->instruction_set != kThumb2) {
-        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set != kThumb2) {
+        mir_to_lir->GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpSub;
@@ -1864,42 +1991,53 @@
       break;
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
-      if (cu_->instruction_set != kMips) {
-        GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set != kMips) {
+        mir_to_lir->GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       } else {
         call_out = true;
-        ret_reg = TargetReg(kRet0).GetReg();
-        func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
+        ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmul);
       }
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
+        return;
+      }
       call_out = true;
       check_zero = true;
-      ret_reg = TargetReg(kRet0).GetReg();
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLdiv);
+      ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLdiv);
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
+        return;
+      }
       call_out = true;
       check_zero = true;
-      func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmod);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmod);
       /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
-      ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2).GetReg() : TargetReg(kRet0).GetReg();
+      ret_reg = (cu->instruction_set == kThumb2) ? mir_to_lir->TargetReg(kArg2).GetReg() :
+          mir_to_lir->TargetReg(kRet0).GetReg();
       break;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
-      if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        return GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
+        return mir_to_lir->GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
       second_op = kOpAnd;
       break;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
-      if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
+        mir_to_lir->GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpOr;
@@ -1907,51 +2045,76 @@
       break;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
-      if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
+        mir_to_lir->GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpXor;
       second_op = kOpXor;
       break;
     case Instruction::NEG_LONG: {
-      GenNegLong(rl_dest, rl_src2);
+      mir_to_lir->GenNegLong(rl_dest, rl_src2);
       return;
     }
     default:
       LOG(FATAL) << "Invalid long arith op";
   }
   if (!call_out) {
-    GenLong3Addr(first_op, second_op, rl_dest, rl_src1, rl_src2);
+    mir_to_lir->GenLong3Addr(first_op, second_op, rl_dest, rl_src1, rl_src2);
   } else {
-    FlushAllRegs();   /* Send everything to home location */
+    mir_to_lir->FlushAllRegs();   /* Send everything to home location */
     if (check_zero) {
-      RegStorage r_tmp1 = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
-      RegStorage r_tmp2 = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
-      LoadValueDirectWideFixed(rl_src2, r_tmp2);
-      RegStorage r_tgt = CallHelperSetup(func_offset);
-      GenDivZeroCheckWide(RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)));
-      LoadValueDirectWideFixed(rl_src1, r_tmp1);
+      RegStorage r_tmp1 = RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg0),
+                                                  mir_to_lir->TargetReg(kArg1));
+      RegStorage r_tmp2 = RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg2),
+                                                  mir_to_lir->TargetReg(kArg3));
+      mir_to_lir->LoadValueDirectWideFixed(rl_src2, r_tmp2);
+      RegStorage r_tgt = mir_to_lir->CallHelperSetup(func_offset);
+      mir_to_lir->GenDivZeroCheckWide(RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg2),
+                                                              mir_to_lir->TargetReg(kArg3)));
+      mir_to_lir->LoadValueDirectWideFixed(rl_src1, r_tmp1);
       // NOTE: callout here is not a safepoint
-      CallHelper(r_tgt, func_offset, false /* not safepoint */);
+      mir_to_lir->CallHelper(r_tgt, func_offset, false /* not safepoint */);
     } else {
-      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
+      mir_to_lir->CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
-    if (ret_reg == TargetReg(kRet0).GetReg())
-      rl_result = GetReturnWide(false);
+    if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
+      rl_result = mir_to_lir->GetReturnWide(false);
     else
-      rl_result = GetReturnWideAlt();
-    StoreValueWide(rl_dest, rl_result);
+      rl_result = mir_to_lir->GetReturnWideAlt();
+    mir_to_lir->StoreValueWide(rl_dest, rl_result);
   }
 }
 
-void Mir2Lir::GenConversionCall(ThreadOffset<4> func_offset,
+void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    GenArithOpLongImpl<8>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
+  } else {
+    GenArithOpLongImpl<4>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
+  }
+}
+
+void Mir2Lir::GenConst(RegLocation rl_dest, int value) {
+  RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
+  LoadConstantNoClobber(rl_result.reg, value);
+  StoreValue(rl_dest, rl_result);
+  if (value == 0) {
+    Workaround7250540(rl_dest, rl_result.reg);
+  }
+}
+
+template <size_t pointer_size>
+void Mir2Lir::GenConversionCall(ThreadOffset<pointer_size> func_offset,
                                 RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Don't optimize the register usage since it calls out to support
    * functions
    */
+  DCHECK_EQ(pointer_size, GetInstructionSetPointerSize(cu_->instruction_set));
+
   FlushAllRegs();   /* Send everything to home location */
   CallRuntimeHelperRegLocation(func_offset, rl_src, false);
   if (rl_dest.wide) {
@@ -1964,6 +2127,10 @@
     StoreValue(rl_dest, rl_result);
   }
 }
+template void Mir2Lir::GenConversionCall(ThreadOffset<4> func_offset,
+                                         RegLocation rl_dest, RegLocation rl_src);
+template void Mir2Lir::GenConversionCall(ThreadOffset<8> func_offset,
+                                         RegLocation rl_dest, RegLocation rl_src);
 
 class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
@@ -1975,7 +2142,11 @@
     m2l_->ResetRegPool();
     m2l_->ResetDefTracking();
     GenerateTargetLabel(kPseudoSuspendTarget);
-    m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pTestSuspend), true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pTestSuspend), true);
+    } else {
+      m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pTestSuspend), true);
+    }
     if (cont_ != nullptr) {
       m2l_->OpUnconditionalBranch(cont_);
     }
@@ -2030,13 +2201,21 @@
 /* Call out to helper assembly routine that will null check obj and then lock it. */
 void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pLockObject), rl_src, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pLockObject), rl_src, true);
+  } else {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pLockObject), rl_src, true);
+  }
 }
 
 /* Call out to helper assembly routine that will null check obj and then unlock it. */
 void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject), rl_src, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject), rl_src, true);
+  } else {
+    CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject), rl_src, true);
+  }
 }
 
 /* Generic code for generating a wide constant into a VR. */
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 93a23a6..5ec1ca9 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -22,12 +22,16 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
+#include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
 #include "x86/codegen_x86.h"
 
 namespace art {
 
+// Shortcuts to repeatedly used long types.
+typedef mirror::ObjectArray<mirror::Object> ObjArray;
+
 /*
  * This source files contains "gen" codegen routines that should
  * be applicable to most targets.  Only mid-level support utilities
@@ -59,19 +63,46 @@
   AddSlowPath(new (arena_) IntrinsicSlowPathPath(this, info, branch, resume));
 }
 
+// Macro to help instantiate.
+// TODO: This might be used to only instantiate <4> on pure 32b systems.
+#define INSTANTIATE(sig_part1, ...) \
+  template sig_part1(ThreadOffset<4>, __VA_ARGS__); \
+  template sig_part1(ThreadOffset<8>, __VA_ARGS__); \
+
+
 /*
  * To save scheduling time, helper calls are broken into two parts: generation of
  * the helper target address, and the actual call to the helper.  Because x86
  * has a memory call operation, part 1 is a NOP for x86.  For other targets,
  * load arguments between the two parts.
  */
+// template <size_t pointer_size>
 RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
-  return (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) ? RegStorage::InvalidReg() : LoadHelper(helper_offset);
+  // All CallRuntimeHelperXXX call this first. So make a central check here.
+  DCHECK_EQ(4U, GetInstructionSetPointerSize(cu_->instruction_set));
+
+  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+    return RegStorage::InvalidReg();
+  } else {
+    return LoadHelper(helper_offset);
+  }
+}
+
+RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<8> helper_offset) {
+  // All CallRuntimeHelperXXX call this first. So make a central check here.
+  DCHECK_EQ(8U, GetInstructionSetPointerSize(cu_->instruction_set));
+
+  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+    return RegStorage::InvalidReg();
+  } else {
+    return LoadHelper(helper_offset);
+  }
 }
 
 /* NOTE: if r_tgt is a temp, it will be freed following use */
-LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
-                         bool use_link) {
+template <size_t pointer_size>
+LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<pointer_size> helper_offset,
+                         bool safepoint_pc, bool use_link) {
   LIR* call_inst;
   OpKind op = use_link ? kOpBlx : kOpBx;
   if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
@@ -85,30 +116,41 @@
   }
   return call_inst;
 }
+template LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+                                        bool safepoint_pc, bool use_link);
+template LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<8> helper_offset,
+                                        bool safepoint_pc, bool use_link);
 
-void Mir2Lir::CallRuntimeHelper(ThreadOffset<4> helper_offset, bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelper(ThreadOffset<pointer_size> helper_offset, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelper, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<pointer_size> helper_offset, int arg0, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImm, int arg0, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                    bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   OpRegCopy(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperReg, RegStorage arg0, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
-                                           bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegLocation(ThreadOffset<pointer_size> helper_offset,
+                                           RegLocation arg0, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   if (arg0.wide == 0) {
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
@@ -117,19 +159,23 @@
     LoadValueDirectWideFixed(arg0, r_tmp);
   }
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocation, RegLocation arg0, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmImm(ThreadOffset<4> helper_offset, int arg0, int arg1,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmImm(ThreadOffset<pointer_size> helper_offset, int arg0, int arg1,
                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadConstant(TargetReg(kArg0), arg0);
   LoadConstant(TargetReg(kArg1), arg1);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmImm, int arg0, int arg1, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<4> helper_offset, int arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_offset, int arg0,
                                               RegLocation arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   if (arg1.wide == 0) {
@@ -140,46 +186,58 @@
   }
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmRegLocation, int arg0, RegLocation arg1,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegLocationImm(ThreadOffset<4> helper_offset, RegLocation arg0,
-                                              int arg1, bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegLocationImm(ThreadOffset<pointer_size> helper_offset,
+                                              RegLocation arg0, int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadValueDirectFixed(arg0, TargetReg(kArg0));
   LoadConstant(TargetReg(kArg1), arg1);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationImm, RegLocation arg0, int arg1,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmReg(ThreadOffset<4> helper_offset, int arg0, RegStorage arg1,
-                                      bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmReg(ThreadOffset<pointer_size> helper_offset, int arg0,
+                                      RegStorage arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   OpRegCopy(TargetReg(kArg1), arg1);
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmReg, int arg0, RegStorage arg1, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegImm(ThreadOffset<4> helper_offset, RegStorage arg0, int arg1,
-                                      bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
+                                      int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   OpRegCopy(TargetReg(kArg0), arg0);
   LoadConstant(TargetReg(kArg1), arg1);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegImm, RegStorage arg0, int arg1, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmMethod(ThreadOffset<4> helper_offset, int arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmMethod(ThreadOffset<pointer_size> helper_offset, int arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadCurrMethodDirect(TargetReg(kArg1));
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmMethod, int arg0, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<4> helper_offset, RegStorage arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
@@ -188,11 +246,14 @@
   }
   LoadCurrMethodDirect(TargetReg(kArg1));
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegMethod, RegStorage arg0, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset<4> helper_offset, RegStorage arg0,
-                                                    RegLocation arg2, bool safepoint_pc) {
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset<pointer_size> helper_offset,
+                                                    RegStorage arg0, RegLocation arg2,
+                                                    bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
   if (TargetReg(kArg0) != arg0) {
@@ -201,10 +262,13 @@
   LoadCurrMethodDirect(TargetReg(kArg1));
   LoadValueDirectFixed(arg2, TargetReg(kArg2));
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegMethodRegLocation, RegStorage arg0, RegLocation arg2,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<4> helper_offset,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                       RegLocation arg0, RegLocation arg1,
                                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
@@ -251,8 +315,10 @@
     }
   }
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocation, RegLocation arg0,
+            RegLocation arg1, bool safepoint_pc)
 
 void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
   if (arg1.GetReg() == TargetReg(kArg0).GetReg()) {
@@ -271,48 +337,61 @@
   }
 }
 
-void Mir2Lir::CallRuntimeHelperRegReg(ThreadOffset<4> helper_offset, RegStorage arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                       RegStorage arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   CopyToArgumentRegs(arg0, arg1);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegReg, RegStorage arg0, RegStorage arg1,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegRegImm(ThreadOffset<4> helper_offset, RegStorage arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                          RegStorage arg1, int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   CopyToArgumentRegs(arg0, arg1);
   LoadConstant(TargetReg(kArg2), arg2);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegRegImm, RegStorage arg0, RegStorage arg1, int arg2,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(ThreadOffset<4> helper_offset,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                     int arg0, RegLocation arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadValueDirectFixed(arg2, TargetReg(kArg2));
   LoadCurrMethodDirect(TargetReg(kArg1));
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmMethodRegLocation, int arg0, RegLocation arg2,
+            bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmMethodImm(ThreadOffset<4> helper_offset, int arg0,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmMethodImm(ThreadOffset<pointer_size> helper_offset, int arg0,
                                             int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadCurrMethodDirect(TargetReg(kArg1));
   LoadConstant(TargetReg(kArg2), arg2);
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmMethodImm, int arg0, int arg2, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<4> helper_offset,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                          int arg0, RegLocation arg1,
                                                          RegLocation arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_EQ(arg1.wide, 0U);
+  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);  // The static_cast works around an
+                                                        // instantiation bug in GCC.
   LoadValueDirectFixed(arg1, TargetReg(kArg1));
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
@@ -322,27 +401,32 @@
   }
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation, int arg0, RegLocation arg1,
+            RegLocation arg2, bool safepoint_pc)
 
-void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<4> helper_offset,
+template <size_t pointer_size>
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                                  RegLocation arg0, RegLocation arg1,
                                                                  RegLocation arg2,
                                                                  bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_EQ(arg0.wide, 0U);
+  DCHECK_EQ(static_cast<unsigned int>(arg0.wide), 0U);
   LoadValueDirectFixed(arg0, TargetReg(kArg0));
-  DCHECK_EQ(arg1.wide, 0U);
+  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
   LoadValueDirectFixed(arg1, TargetReg(kArg1));
-  DCHECK_EQ(arg1.wide, 0U);
+  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
   LoadValueDirectFixed(arg2, TargetReg(kArg2));
   ClobberCallerSave();
-  CallHelper(r_tgt, helper_offset, safepoint_pc);
+  CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
+INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation, RegLocation arg0,
+            RegLocation arg1, RegLocation arg2, bool safepoint_pc)
 
 /*
  * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform intial
+ * to a callee-save register, flush them to the frame.  Perform initial
  * assignment of promoted arguments.
  *
  * ArgLocs is an array of location records describing the incoming arguments
@@ -358,7 +442,7 @@
   rl_src.location = kLocPhysReg;
   rl_src.reg = TargetReg(kArg0);
   rl_src.home = false;
-  MarkLive(rl_src.reg, rl_src.s_reg_low);
+  MarkLive(rl_src);
   if (rl_method.wide) {
     StoreValueWide(rl_method, rl_src);
   } else {
@@ -494,8 +578,8 @@
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
       cg->LoadRefDisp(cg->TargetReg(kArg0),
-                      mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
-                      (target_method.dex_method_index * 4), cg->TargetReg(kArg0));
+                      ObjArray::OffsetOfElement(target_method.dex_method_index).Int32Value(),
+                      cg->TargetReg(kArg0));
       break;
     case 3:  // Grab the code from the method*
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
@@ -548,8 +632,8 @@
                       cg->TargetReg(kInvokeTgt));
       break;
     case 3:  // Get target method [use kInvokeTgt, set kArg0]
-      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), (method_idx * 4) +
-                      mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(),
+      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
+                      ObjArray::OffsetOfElement(method_idx).Int32Value(),
                       cg->TargetReg(kArg0));
       break;
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
@@ -605,8 +689,8 @@
       break;
     case 4:  // Get target method [use kInvokeTgt, set kArg0]
       // NOTE: native pointer.
-      cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), ((method_idx % ClassLinker::kImtSize) * 4) +
-                       mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(),
+      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
+                       ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(),
                        cg->TargetReg(kArg0));
       break;
     case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
@@ -623,7 +707,8 @@
   return state + 1;
 }
 
-static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info, ThreadOffset<4> trampoline,
+template <size_t pointer_size>
+static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info, ThreadOffset<pointer_size> trampoline,
                             int state, const MethodReference& target_method,
                             uint32_t method_idx) {
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
@@ -649,32 +734,52 @@
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeStaticTrampolineWithAccessCheck);
-  return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0);
+  if (Is64BitInstructionSet(cu->instruction_set)) {
+    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeStaticTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
+  } else {
+    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeStaticTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
+  }
 }
 
 static int NextDirectCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeDirectTrampolineWithAccessCheck);
-  return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0);
+  if (Is64BitInstructionSet(cu->instruction_set)) {
+    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeDirectTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
+  } else {
+    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeDirectTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
+  }
 }
 
 static int NextSuperCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                                const MethodReference& target_method,
                                uint32_t unused, uintptr_t unused2,
                                uintptr_t unused3, InvokeType unused4) {
-  ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeSuperTrampolineWithAccessCheck);
-  return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0);
+  if (Is64BitInstructionSet(cu->instruction_set)) {
+    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeSuperTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
+  } else {
+    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeSuperTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
+  }
 }
 
 static int NextVCallInsnSP(CompilationUnit* cu, CallInfo* info, int state,
                            const MethodReference& target_method,
                            uint32_t unused, uintptr_t unused2,
                            uintptr_t unused3, InvokeType unused4) {
-  ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeVirtualTrampolineWithAccessCheck);
-  return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0);
+  if (Is64BitInstructionSet(cu->instruction_set)) {
+    ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeVirtualTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
+  } else {
+    ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeVirtualTrampolineWithAccessCheck);
+    return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
+  }
 }
 
 static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu,
@@ -682,9 +787,13 @@
                                                 const MethodReference& target_method,
                                                 uint32_t unused, uintptr_t unused2,
                                                 uintptr_t unused3, InvokeType unused4) {
-  ThreadOffset<4> trampoline =
-      QUICK_ENTRYPOINT_OFFSET(4, pInvokeInterfaceTrampolineWithAccessCheck);
-  return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0);
+  if (Is64BitInstructionSet(cu->instruction_set)) {
+      ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeInterfaceTrampolineWithAccessCheck);
+      return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
+    } else {
+      ThreadOffset<4> trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeInterfaceTrampolineWithAccessCheck);
+      return NextInvokeInsnSP<4>(cu, info, trampoline, state, target_method, 0);
+    }
 }
 
 int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
@@ -692,8 +801,10 @@
                          const MethodReference& target_method,
                          uint32_t vtable_idx, uintptr_t direct_code,
                          uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = TargetReg(kArg3).GetReg();
-  int next_reg = TargetReg(kArg1).GetReg();
+  int last_arg_reg = 3 - 1;
+  int arg_regs[3] = {TargetReg(kArg1).GetReg(), TargetReg(kArg2).GetReg(), TargetReg(kArg3).GetReg()};
+
+  int next_reg = 0;
   int next_arg = 0;
   if (skip_this) {
     next_reg++;
@@ -702,8 +813,8 @@
   for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
     RegLocation rl_arg = info->args[next_arg++];
     rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
-      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
+    if (rl_arg.wide && (next_reg <= last_arg_reg - 1)) {
+      RegStorage r_tmp(RegStorage::k64BitPair, arg_regs[next_reg], arg_regs[next_reg + 1]);
       LoadValueDirectWideFixed(rl_arg, r_tmp);
       next_reg++;
       next_arg++;
@@ -712,7 +823,7 @@
         rl_arg = NarrowRegLoc(rl_arg);
         rl_arg.is_const = false;
       }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(arg_regs[next_reg]));
     }
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                 direct_code, direct_method, type);
@@ -753,7 +864,8 @@
       // Wide spans, we need the 2nd half of uses[2].
       rl_arg = UpdateLocWide(rl_use2);
       if (rl_arg.location == kLocPhysReg) {
-        reg = rl_arg.reg.GetHigh();
+        // NOTE: not correct for 64-bit core regs, but this needs rewriting for hard-float.
+        reg = rl_arg.reg.IsPair() ? rl_arg.reg.GetHigh() : rl_arg.reg.DoubleToHighSingle();
       } else {
         // kArg2 & rArg3 can safely be used here
         reg = TargetReg(kArg3);
@@ -768,34 +880,28 @@
     }
     // Loop through the rest
     while (next_use < info->num_arg_words) {
-      RegStorage low_reg;
-      RegStorage high_reg;
+      RegStorage arg_reg;
       rl_arg = info->args[next_use];
       rl_arg = UpdateRawLoc(rl_arg);
       if (rl_arg.location == kLocPhysReg) {
-        if (rl_arg.wide) {
-          low_reg = rl_arg.reg.GetLow();
-          high_reg = rl_arg.reg.GetHigh();
-        } else {
-          low_reg = rl_arg.reg;
-        }
+        arg_reg = rl_arg.reg;
       } else {
-        low_reg = TargetReg(kArg2);
+        arg_reg = rl_arg.wide ? RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)) :
+            TargetReg(kArg2);
         if (rl_arg.wide) {
-          high_reg = TargetReg(kArg3);
-          LoadValueDirectWideFixed(rl_arg, RegStorage::MakeRegPair(low_reg, high_reg));
+          LoadValueDirectWideFixed(rl_arg, arg_reg);
         } else {
-          LoadValueDirectFixed(rl_arg, low_reg);
+          LoadValueDirectFixed(rl_arg, arg_reg);
         }
         call_state = next_call_insn(cu_, info, call_state, target_method,
                                     vtable_idx, direct_code, direct_method, type);
       }
       int outs_offset = (next_use + 1) * 4;
       if (rl_arg.wide) {
-        StoreBaseDispWide(TargetReg(kSp), outs_offset, RegStorage::MakeRegPair(low_reg, high_reg));
+        StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64);
         next_use += 2;
       } else {
-        Store32Disp(TargetReg(kSp), outs_offset, low_reg);
+        Store32Disp(TargetReg(kSp), outs_offset, arg_reg);
         next_use++;
       }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
@@ -860,7 +966,7 @@
     if (loc.wide) {
       loc = UpdateLocWide(loc);
       if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
-        StoreBaseDispWide(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
       }
       next_arg += 2;
     } else {
@@ -924,9 +1030,9 @@
         bytes_to_move = sizeof(uint32_t) * 4;
 
         // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.
+        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
+        // there are no free registers.
         RegStorage temp = AllocTempDouble();
-        CHECK_GT(temp.GetLowReg(), 0);
 
         LIR* ld1 = nullptr;
         LIR* ld2 = nullptr;
@@ -989,9 +1095,7 @@
         }
 
         // Free the temporary used for the data movement.
-        // CLEANUP: temp is currently a bogus pair, elmiminate extra free when updated.
-        FreeTemp(temp.GetLow());
-        FreeTemp(temp.GetHigh());
+        FreeTemp(temp);
       } else {
         // Moving 32-bits via general purpose register.
         bytes_to_move = sizeof(uint32_t);
@@ -1013,8 +1117,13 @@
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-    CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetReg(kArg0),
-                               TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetReg(kArg0),
+                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
+    } else {
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetReg(kArg0),
+                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
+    }
   }
 
   call_state = LoadArgRegs(info, call_state, next_call_insn,
@@ -1136,8 +1245,7 @@
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     LoadBaseIndexed(reg_ptr, reg_off, rl_result.reg, 1, kUnsignedHalf);
   } else {
-    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg,
-                        RegStorage::InvalidReg(), kUnsignedHalf, INVALID_SREG);
+    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf);
   }
   FreeTemp(reg_off);
   FreeTemp(reg_ptr);
@@ -1260,6 +1368,7 @@
   OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
+  FreeTemp(sign_reg);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -1345,7 +1454,9 @@
     RegLocation rl_start = info->args[2];     // 3rd arg only present in III flavor of IndexOf.
     LoadValueDirectFixed(rl_start, reg_start);
   }
-  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pIndexOf));
+  RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pIndexOf)) :
+      LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pIndexOf));
   GenExplicitNullCheck(reg_ptr, info->opt_flags);
   LIR* high_code_point_branch =
       rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr);
@@ -1382,8 +1493,16 @@
   RegLocation rl_cmp = info->args[1];
   LoadValueDirectFixed(rl_this, reg_this);
   LoadValueDirectFixed(rl_cmp, reg_cmp);
-  RegStorage r_tgt = (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) ?
-      LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo)) : RegStorage::InvalidReg();
+  RegStorage r_tgt;
+  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
+    } else {
+      r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
+    }
+  } else {
+    r_tgt = RegStorage::InvalidReg();
+  }
   GenExplicitNullCheck(reg_this, info->opt_flags);
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
   // TUNING: check if rl_cmp.s_reg_low is already null checked
@@ -1393,7 +1512,11 @@
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     OpReg(kOpBlx, r_tgt);
   } else {
-    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
+    } else {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
+    }
   }
   RegLocation rl_return = GetReturn(false);
   RegLocation rl_dest = InlineTarget(info);
@@ -1404,12 +1527,32 @@
 bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  ThreadOffset<4> offset = Thread::PeerOffset<4>();
-  if (cu_->instruction_set == kThumb2 || cu_->instruction_set == kMips) {
-    Load32Disp(TargetReg(kSelf), offset.Int32Value(), rl_result.reg);
-  } else {
-    CHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
-    reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg.GetReg(), offset);
+
+  switch (cu_->instruction_set) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      // Fall-through.
+    case kMips:
+      Load32Disp(TargetReg(kSelf), Thread::PeerOffset<4>().Int32Value(), rl_result.reg);
+      break;
+
+    case kArm64:
+      Load32Disp(TargetReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg);
+      break;
+
+    case kX86:
+      reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg,
+                                                          Thread::PeerOffset<4>());
+      break;
+
+    case kX86_64:
+      reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg,
+                                                          Thread::PeerOffset<8>());
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected isa " << cu_->instruction_set;
   }
   StoreValue(rl_dest, rl_result);
   return true;
@@ -1431,14 +1574,13 @@
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
-    if (cu_->instruction_set == kX86) {
-      LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg.GetLow(),
-                          rl_result.reg.GetHigh(), k64, INVALID_SREG);
+    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+      LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      LoadBaseDispWide(rl_temp_offset, 0, rl_result.reg, INVALID_SREG);
-      FreeTemp(rl_temp_offset.GetReg());
+      LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64);
+      FreeTemp(rl_temp_offset);
     }
   } else {
     LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
@@ -1479,14 +1621,13 @@
   RegLocation rl_value;
   if (is_long) {
     rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    if (cu_->instruction_set == kX86) {
-      StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg.GetLow(),
-                           rl_value.reg.GetHigh(), k64, INVALID_SREG);
+    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+      StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
-      StoreBaseDispWide(rl_temp_offset, 0, rl_value.reg);
-      FreeTemp(rl_temp_offset.GetReg());
+      StoreBaseDisp(rl_temp_offset, 0, rl_value.reg, k64);
+      FreeTemp(rl_temp_offset);
     }
   } else {
     rl_value = LoadValue(rl_src_value, kCoreReg);
@@ -1494,7 +1635,7 @@
   }
 
   // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
-  FreeTemp(rl_offset.reg.GetReg());
+  FreeTemp(rl_offset.reg);
 
   if (is_volatile) {
     // A load might follow the volatile store so insert a StoreLoad barrier.
@@ -1518,13 +1659,42 @@
     return;
   }
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
-      ->GenIntrinsic(this, info)) {
-    return;
+  // TODO: Enable instrinsics for x86_64
+  // Temporary disable intrinsics for x86_64. We will enable them later step by step.
+  if (cu_->instruction_set != kX86_64) {
+    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+        ->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   GenInvokeNoInline(info);
 }
 
+template <size_t pointer_size>
+static LIR* GenInvokeNoInlineCall(Mir2Lir* mir_to_lir, InvokeType type) {
+  ThreadOffset<pointer_size> trampoline(-1);
+  switch (type) {
+    case kInterface:
+      trampoline = QUICK_ENTRYPOINT_OFFSET(pointer_size, pInvokeInterfaceTrampolineWithAccessCheck);
+      break;
+    case kDirect:
+      trampoline = QUICK_ENTRYPOINT_OFFSET(pointer_size, pInvokeDirectTrampolineWithAccessCheck);
+      break;
+    case kStatic:
+      trampoline = QUICK_ENTRYPOINT_OFFSET(pointer_size, pInvokeStaticTrampolineWithAccessCheck);
+      break;
+    case kSuper:
+      trampoline = QUICK_ENTRYPOINT_OFFSET(pointer_size, pInvokeSuperTrampolineWithAccessCheck);
+      break;
+    case kVirtual:
+      trampoline = QUICK_ENTRYPOINT_OFFSET(pointer_size, pInvokeVirtualTrampolineWithAccessCheck);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected invoke type";
+  }
+  return mir_to_lir->OpThreadMem(kOpBlx, trampoline);
+}
+
 void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
   int call_state = 0;
   LIR* null_ck;
@@ -1536,6 +1706,7 @@
 
   const MirMethodLoweringInfo& method_info = mir_graph_->GetMethodLoweringInfo(info->mir);
   cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
+  BeginInvoke(info);
   InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
   info->type = static_cast<InvokeType>(method_info.GetSharpType());
   bool fast_path = method_info.FastPath();
@@ -1592,29 +1763,15 @@
                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
       }
     } else {
-      ThreadOffset<4> trampoline(-1);
-      switch (info->type) {
-      case kInterface:
-        trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeInterfaceTrampolineWithAccessCheck);
-        break;
-      case kDirect:
-        trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeDirectTrampolineWithAccessCheck);
-        break;
-      case kStatic:
-        trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeStaticTrampolineWithAccessCheck);
-        break;
-      case kSuper:
-        trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeSuperTrampolineWithAccessCheck);
-        break;
-      case kVirtual:
-        trampoline = QUICK_ENTRYPOINT_OFFSET(4, pInvokeVirtualTrampolineWithAccessCheck);
-        break;
-      default:
-        LOG(FATAL) << "Unexpected invoke type";
+      // TODO: Extract?
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        call_inst = GenInvokeNoInlineCall<8>(this, info->type);
+      } else {
+        call_inst = GenInvokeNoInlineCall<4>(this, info->type);
       }
-      call_inst = OpThreadMem(kOpBlx, trampoline);
     }
   }
+  EndInvoke(info);
   MarkSafepointPC(call_inst);
 
   ClobberCallerSave();
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 9808f7f..f5e7e63 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -123,7 +123,7 @@
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
-    LoadBaseDispWide(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, INVALID_SREG);
+    LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64);
   }
 }
 
@@ -139,12 +139,26 @@
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
-  rl_src = EvalLoc(rl_src, op_kind, false);
-  if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
-    LoadValueDirect(rl_src, rl_src.reg);
-    rl_src.location = kLocPhysReg;
-    MarkLive(rl_src.reg, rl_src.s_reg_low);
+  rl_src = UpdateLoc(rl_src);
+  if (rl_src.location == kLocPhysReg) {
+    if (!RegClassMatches(op_kind, rl_src.reg)) {
+      // Wrong register class, realloc, copy and transfer ownership.
+      RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
+      OpRegCopy(new_reg, rl_src.reg);
+      // Clobber the old reg.
+      Clobber(rl_src.reg);
+      // ...and mark the new one live.
+      rl_src.reg = new_reg;
+      MarkLive(rl_src);
+    }
+    return rl_src;
   }
+
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  rl_src.reg = AllocTypedTemp(rl_src.fp, op_kind);
+  LoadValueDirect(rl_src, rl_src.reg);
+  rl_src.location = kLocPhysReg;
+  MarkLive(rl_src);
   return rl_src;
 }
 
@@ -184,12 +198,12 @@
   }
 
   // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest.reg, rl_dest.s_reg_low);
+  MarkLive(rl_dest);
   MarkDirty(rl_dest);
 
 
   ResetDefLoc(rl_dest);
-  if (IsDirty(rl_dest.reg) && oat_live_out(rl_dest.s_reg_low)) {
+  if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     def_start = last_lir_insn_;
     Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
@@ -203,18 +217,27 @@
 
 RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) {
   DCHECK(rl_src.wide);
-  rl_src = EvalLoc(rl_src, op_kind, false);
-  if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
-    LoadValueDirectWide(rl_src, rl_src.reg);
-    rl_src.location = kLocPhysReg;
-    MarkLive(rl_src.reg.GetLow(), rl_src.s_reg_low);
-    if (rl_src.reg.GetLowReg() != rl_src.reg.GetHighReg()) {
-      MarkLive(rl_src.reg.GetHigh(), GetSRegHi(rl_src.s_reg_low));
-    } else {
-      // This must be an x86 vector register value.
-      DCHECK(IsFpReg(rl_src.reg) && (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64));
+  rl_src = UpdateLocWide(rl_src);
+  if (rl_src.location == kLocPhysReg) {
+    if (!RegClassMatches(op_kind, rl_src.reg)) {
+      // Wrong register class, realloc, copy and transfer ownership.
+      RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
+      OpRegCopyWide(new_regs, rl_src.reg);
+      // Clobber the old regs.
+      Clobber(rl_src.reg);
+      // ...and mark the new ones live.
+      rl_src.reg = new_regs;
+      MarkLive(rl_src);
     }
+    return rl_src;
   }
+
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  DCHECK_NE(GetSRegHi(rl_src.s_reg_low), INVALID_SREG);
+  rl_src.reg = AllocTypedTempWide(rl_src.fp, op_kind);
+  LoadValueDirectWide(rl_src, rl_src.reg);
+  rl_src.location = kLocPhysReg;
+  MarkLive(rl_src);
   return rl_src;
 }
 
@@ -239,7 +262,11 @@
     if (IsLive(rl_src.reg) ||
         IsPromoted(rl_src.reg) ||
         (rl_dest.location == kLocPhysReg)) {
-      // Src is live or promoted or Dest has assigned reg.
+      /*
+       * If src reg[s] are tied to the original Dalvik vreg via liveness or promotion, we
+       * can't repurpose them.  Similarly, if the dest reg[s] are tied to Dalvik vregs via
+       * promotion, we can't just re-assign.  In these cases, we have to copy.
+       */
       rl_dest = EvalLoc(rl_dest, kAnyReg, false);
       OpRegCopyWide(rl_dest.reg, rl_src.reg);
     } else {
@@ -254,28 +281,17 @@
   }
 
   // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest.reg.GetLow(), rl_dest.s_reg_low);
-
-  // Does this wide value live in two registers (or one vector one)?
-  // FIXME: wide reg update.
-  if (rl_dest.reg.GetLowReg() != rl_dest.reg.GetHighReg()) {
-    MarkLive(rl_dest.reg.GetHigh(), GetSRegHi(rl_dest.s_reg_low));
-    MarkDirty(rl_dest);
-    MarkPair(rl_dest.reg.GetLowReg(), rl_dest.reg.GetHighReg());
-  } else {
-    // This must be an x86 vector register value,
-    DCHECK(IsFpReg(rl_dest.reg) && (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64));
-    MarkDirty(rl_dest);
-  }
-
+  MarkLive(rl_dest);
+  MarkWide(rl_dest.reg);
+  MarkDirty(rl_dest);
 
   ResetDefLocWide(rl_dest);
-  if (IsDirty(rl_dest.reg) && (oat_live_out(rl_dest.s_reg_low) ||
-      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+  if (IsDirty(rl_dest.reg) && (LiveOut(rl_dest.s_reg_low) ||
+      LiveOut(GetSRegHi(rl_dest.s_reg_low)))) {
     def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
@@ -295,13 +311,12 @@
   }
 
   // Dest is now live and dirty (until/if we flush it to home location)
-  MarkLive(rl_dest.reg, rl_dest.s_reg_low);
+  MarkLive(rl_dest);
   MarkDirty(rl_dest);
 
 
   ResetDefLoc(rl_dest);
-  if (IsDirty(rl_dest.reg) &&
-      oat_live_out(rl_dest.s_reg_low)) {
+  if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     LIR *def_start = last_lir_insn_;
     Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
@@ -314,7 +329,6 @@
 }
 
 void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
-  DCHECK_EQ(IsFpReg(rl_src.reg.GetLowReg()), IsFpReg(rl_src.reg.GetHighReg()));
   DCHECK(rl_dest.wide);
   DCHECK(rl_src.wide);
   DCHECK_EQ(rl_src.location, kLocPhysReg);
@@ -325,32 +339,21 @@
     // Just re-assign the registers.  Dest gets Src's regs.
     rl_dest.location = kLocPhysReg;
     rl_dest.reg = rl_src.reg;
-    Clobber(rl_src.reg.GetLowReg());
-    Clobber(rl_src.reg.GetHighReg());
+    Clobber(rl_src.reg);
   }
 
   // Dest is now live and dirty (until/if we flush it to home location).
-  MarkLive(rl_dest.reg.GetLow(), rl_dest.s_reg_low);
-
-  // Does this wide value live in two registers (or one vector one)?
-  // FIXME: wide reg.
-  if (rl_dest.reg.GetLowReg() != rl_dest.reg.GetHighReg()) {
-    MarkLive(rl_dest.reg.GetHigh(), GetSRegHi(rl_dest.s_reg_low));
-    MarkDirty(rl_dest);
-    MarkPair(rl_dest.reg.GetLowReg(), rl_dest.reg.GetHighReg());
-  } else {
-    // This must be an x86 vector register value,
-    DCHECK(IsFpReg(rl_dest.reg) && (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64));
-    MarkDirty(rl_dest);
-  }
+  MarkLive(rl_dest);
+  MarkWide(rl_dest.reg);
+  MarkDirty(rl_dest);
 
   ResetDefLocWide(rl_dest);
-  if (IsDirty(rl_dest.reg) && (oat_live_out(rl_dest.s_reg_low) ||
-      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+  if (IsDirty(rl_dest.reg) && (LiveOut(rl_dest.s_reg_low) ||
+      LiveOut(GetSRegHi(rl_dest.s_reg_low)))) {
     LIR *def_start = last_lir_insn_;
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
-    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
@@ -369,7 +372,7 @@
 RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
   DCHECK(!loc.wide);
   DCHECK(loc.location == kLocPhysReg);
-  DCHECK(!IsFpReg(loc.reg));
+  DCHECK(!loc.reg.IsFloat());
   if (IsTemp(loc.reg)) {
     Clobber(loc.reg);
   } else {
@@ -383,21 +386,20 @@
   return loc;
 }
 
-// FIXME: wide regs.
+// FIXME: will need an update for 64-bit core regs.
 RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
   DCHECK(loc.wide);
   DCHECK(loc.location == kLocPhysReg);
-  DCHECK(!IsFpReg(loc.reg.GetLowReg()));
-  DCHECK(!IsFpReg(loc.reg.GetHighReg()));
-  if (IsTemp(loc.reg.GetLowReg())) {
-    Clobber(loc.reg.GetLowReg());
+  DCHECK(!loc.reg.IsFloat());
+  if (IsTemp(loc.reg.GetLow())) {
+    Clobber(loc.reg.GetLow());
   } else {
     RegStorage temp_low = AllocTemp();
     OpRegCopy(temp_low, loc.reg.GetLow());
     loc.reg.SetLowReg(temp_low.GetReg());
   }
-  if (IsTemp(loc.reg.GetHighReg())) {
-    Clobber(loc.reg.GetHighReg());
+  if (IsTemp(loc.reg.GetHigh())) {
+    Clobber(loc.reg.GetHigh());
   } else {
     RegStorage temp_high = AllocTemp();
     OpRegCopy(temp_high, loc.reg.GetHigh());
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index 4bdc9fa..4a918a1 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -163,7 +163,7 @@
           DCHECK(!(check_flags & IS_STORE));
           /* Same value && same register type */
           if (check_lir->flags.alias_info == this_lir->flags.alias_info &&
-              SameRegType(check_lir->operands[0], native_reg_id)) {
+              RegStorage::SameRegType(check_lir->operands[0], native_reg_id)) {
             /*
              * Different destination register - insert
              * a move
@@ -179,7 +179,7 @@
           /* Must alias */
           if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
             /* Only optimize compatible registers */
-            bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id);
+            bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
             if ((is_this_lir_load && is_check_lir_load) ||
                 (!is_this_lir_load && is_check_lir_load)) {
               /* RAR or RAW */
diff --git a/compiler/dex/quick/mips/README.mips b/compiler/dex/quick/mips/README.mips
index 061c157..ff561fa 100644
--- a/compiler/dex/quick/mips/README.mips
+++ b/compiler/dex/quick/mips/README.mips
@@ -17,7 +17,7 @@
       code generation for switch tables, fill array data, 64-bit
       data handling and the register usage conventions.
 
-    o The memory model.  Verify that oatGenMemoryBarrier() generates the
+    o The memory model.  Verify that GenMemoryBarrier() generates the
       appropriate flavor of sync.
 
 Register promotion
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index a579254..b26ab57 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -672,16 +672,17 @@
           bits |= (value << encoder->field_loc[i].end);
           break;
         case kFmtDfp: {
-          DCHECK(MIPS_DOUBLEREG(operand));
+          // TODO: do we need to adjust now that we're using 64BitSolo?
+          DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
           DCHECK_EQ((operand & 0x1), 0U);
-          value = ((operand & MIPS_FP_REG_MASK) << encoder->field_loc[i].start) &
+          value = (RegStorage::RegNum(operand) << encoder->field_loc[i].start) &
               ((1 << (encoder->field_loc[i].end + 1)) - 1);
           bits |= value;
           break;
         }
         case kFmtSfp:
-          DCHECK(MIPS_SINGLEREG(operand));
-          value = ((operand & MIPS_FP_REG_MASK) << encoder->field_loc[i].start) &
+          DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
+          value = (RegStorage::RegNum(operand) << encoder->field_loc[i].start) &
               ((1 << (encoder->field_loc[i].end + 1)) - 1);
           bits |= value;
           break;
@@ -747,7 +748,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index df13882..3af3715 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -295,10 +295,10 @@
    * expanding the frame or flushing.  This leaves the utility
    * code with a single temp: r12.  This should be enough.
    */
-  LockTemp(rMIPS_ARG0);
-  LockTemp(rMIPS_ARG1);
-  LockTemp(rMIPS_ARG2);
-  LockTemp(rMIPS_ARG3);
+  LockTemp(rs_rMIPS_ARG0);
+  LockTemp(rs_rMIPS_ARG1);
+  LockTemp(rs_rMIPS_ARG2);
+  LockTemp(rs_rMIPS_ARG3);
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -351,10 +351,10 @@
 
   FlushIns(ArgLocs, rl_method);
 
-  FreeTemp(rMIPS_ARG0);
-  FreeTemp(rMIPS_ARG1);
-  FreeTemp(rMIPS_ARG2);
-  FreeTemp(rMIPS_ARG3);
+  FreeTemp(rs_rMIPS_ARG0);
+  FreeTemp(rs_rMIPS_ARG1);
+  FreeTemp(rs_rMIPS_ARG2);
+  FreeTemp(rs_rMIPS_ARG3);
 }
 
 void MipsMir2Lir::GenExitSequence() {
@@ -362,8 +362,8 @@
    * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
    */
-  LockTemp(rMIPS_RET0);
-  LockTemp(rMIPS_RET1);
+  LockTemp(rs_rMIPS_RET0);
+  LockTemp(rs_rMIPS_RET1);
 
   NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 81d6782..e462173 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -31,32 +31,29 @@
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(int r_base, int displacement, int r_dest, OpSize size, int s_reg);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) OVERRIDE;
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, RegStorage r_dest_hi, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                               OpSize size) OVERRIDE;
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, RegStorage r_src_hi, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    bool IsFpReg(int reg);
-    bool IsFpReg(RegStorage reg);
-    bool SameRegType(int reg1, int reg2);
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
-    int S2d(int low_reg, int high_reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -65,16 +62,13 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint32_t FpRegMask();
-    uint64_t GetRegMaskCommon(int reg);
+    uint64_t GetRegMaskCommon(RegStorage reg);
     void AdjustSpillMask();
     void ClobberCallerSave();
-    void FlushReg(RegStorage reg);
-    void FlushRegWide(RegStorage reg);
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, int reg);
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
@@ -92,6 +86,11 @@
     int GetInsnSize(LIR* lir);
     bool IsUnconditionalBranch(LIR* lir);
 
+    // Check support for volatile load/store of a given size.
+    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+    // Get the register class for load/store of a field.
+    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2);
@@ -119,6 +118,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -126,6 +126,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -137,7 +139,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                        int first_bit, int second_bit);
@@ -170,16 +172,18 @@
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
     LIR* OpVldm(RegStorage r_base, int count);
     LIR* OpVstm(RegStorage r_base, int count);
     void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
     void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val);
+    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
 
     // TODO: collapse r_dest.
     LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                          RegStorage r_dest_hi, OpSize size, int s_reg);
+                          RegStorage r_dest_hi, OpSize size);
     // TODO: collapse r_src.
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                            RegStorage r_src_hi, OpSize size);
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index a479dc7..9fffb2f 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -111,15 +111,13 @@
   rl_result = EvalLoc(rl_dest, kFPReg, true);
   DCHECK(rl_dest.wide);
   DCHECK(rl_result.wide);
-  NewLIR3(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()),
-          S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
+  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
 void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                 RegLocation rl_src) {
   int op = kMipsNop;
-  int src_reg;
   RegLocation rl_result;
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
@@ -157,18 +155,14 @@
   }
   if (rl_src.wide) {
     rl_src = LoadValueWide(rl_src, kFPReg);
-    src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
   } else {
     rl_src = LoadValue(rl_src, kFPReg);
-    src_reg = rl_src.reg.GetReg();
   }
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   if (rl_dest.wide) {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), src_reg);
     StoreValueWide(rl_dest, rl_result);
   } else {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
     StoreValue(rl_dest, rl_result);
   }
 }
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 7c0becd..beaf6bb 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -44,16 +44,16 @@
                              RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  int t0 = AllocTemp().GetReg();
-  int t1 = AllocTemp().GetReg();
+  RegStorage t0 = AllocTemp();
+  RegStorage t1 = AllocTemp();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  NewLIR3(kMipsSlt, t0, rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
-  NewLIR3(kMipsSlt, t1, rl_src2.reg.GetHighReg(), rl_src1.reg.GetHighReg());
-  NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1, t0);
+  NewLIR3(kMipsSlt, t0.GetReg(), rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
+  NewLIR3(kMipsSlt, t1.GetReg(), rl_src2.reg.GetHighReg(), rl_src1.reg.GetHighReg());
+  NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1.GetReg(), t0.GetReg());
   LIR* branch = OpCmpImmBranch(kCondNe, rl_result.reg, 0, NULL);
-  NewLIR3(kMipsSltu, t0, rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
-  NewLIR3(kMipsSltu, t1, rl_src2.reg.GetLowReg(), rl_src1.reg.GetLowReg());
-  NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1, t0);
+  NewLIR3(kMipsSltu, t0.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
+  NewLIR3(kMipsSltu, t1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetLowReg());
+  NewLIR3(kMipsSubu, rl_result.reg.GetReg(), t1.GetReg(), t0.GetReg());
   FreeTemp(t0);
   FreeTemp(t1);
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -114,13 +114,13 @@
   if (cmp_zero) {
     branch = NewLIR2(br_op, src1.GetReg(), src2.GetReg());
   } else {
-    int t_reg = AllocTemp().GetReg();
+    RegStorage t_reg = AllocTemp();
     if (swapped) {
-      NewLIR3(slt_op, t_reg, src2.GetReg(), src1.GetReg());
+      NewLIR3(slt_op, t_reg.GetReg(), src2.GetReg(), src1.GetReg());
     } else {
-      NewLIR3(slt_op, t_reg, src1.GetReg(), src2.GetReg());
+      NewLIR3(slt_op, t_reg.GetReg(), src1.GetReg(), src2.GetReg());
     }
-    branch = NewLIR1(br_op, t_reg);
+    branch = NewLIR1(br_op, t_reg.GetReg());
     FreeTemp(t_reg);
   }
   branch->target = target;
@@ -167,7 +167,7 @@
   if (r_src.IsPair()) {
     r_src = r_src.GetLow();
   }
-  if (MIPS_FPREG(r_dest.GetReg()) || MIPS_FPREG(r_src.GetReg()))
+  if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
   LIR* res = RawLIR(current_dalvik_offset_, kMipsMove,
             r_dest.GetReg(), r_src.GetReg());
@@ -186,17 +186,15 @@
 
 void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
   if (r_dest != r_src) {
-    bool dest_fp = MIPS_FPREG(r_dest.GetLowReg());
-    bool src_fp = MIPS_FPREG(r_src.GetLowReg());
+    bool dest_fp = r_dest.IsFloat();
+    bool src_fp = r_src.IsFloat();
     if (dest_fp) {
       if (src_fp) {
-        // FIXME: handle this here - reserve OpRegCopy for 32-bit copies.
-        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
-        } else {
-          /* note the operands are swapped for the mtc1 instr */
-          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-          NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+        OpRegCopy(r_dest, r_src);
+      } else {
+         /* note the operands are swapped for the mtc1 instr */
+        NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+        NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
       }
     } else {
       if (src_fp) {
@@ -238,9 +236,9 @@
 
 RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit,
                                        bool is_div) {
-  int t_reg = AllocTemp().GetReg();
-  NewLIR3(kMipsAddiu, t_reg, rZERO, lit);
-  NewLIR2(kMipsDiv, reg1.GetReg(), t_reg);
+  RegStorage t_reg = AllocTemp();
+  NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+  NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg());
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_div) {
     NewLIR1(kMipsMflo, rl_result.reg.GetReg());
@@ -271,6 +269,10 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
+void MipsMir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+}
+
 bool MipsMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
@@ -292,7 +294,7 @@
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   DCHECK(size == kSignedByte);
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
   StoreValue(rl_dest, rl_result);
   return true;
 }
@@ -429,6 +431,15 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void MipsMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void MipsMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -470,7 +481,7 @@
  */
 void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                           RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
   RegLocation rl_result;
@@ -496,7 +507,7 @@
   }
   /* reg_ptr -> array data */
   OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-  FreeTemp(rl_array.reg.GetReg());
+  FreeTemp(rl_array.reg);
   if ((size == k64) || (size == kDouble)) {
     if (scale) {
       RegStorage r_new_index = AllocTemp();
@@ -513,7 +524,7 @@
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+    LoadBaseDisp(reg_ptr, 0, rl_result.reg, size);
 
     FreeTemp(reg_ptr);
     StoreValueWide(rl_dest, rl_result);
@@ -537,7 +548,7 @@
  */
 void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                           RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
 
@@ -551,8 +562,8 @@
   rl_index = LoadValue(rl_index, kCoreReg);
   RegStorage reg_ptr;
   bool allocated_reg_ptr_temp = false;
-  if (IsTemp(rl_array.reg.GetReg()) && !card_mark) {
-    Clobber(rl_array.reg.GetReg());
+  if (IsTemp(rl_array.reg) && !card_mark) {
+    Clobber(rl_array.reg);
     reg_ptr = rl_array.reg;
   } else {
     reg_ptr = AllocTemp();
@@ -591,7 +602,7 @@
       FreeTemp(reg_len);
     }
 
-    StoreBaseDispWide(reg_ptr, 0, rl_src.reg);
+    StoreBaseDisp(reg_ptr, 0, rl_src.reg, size);
   } else {
     rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index c5150ee..5b2cb9d 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -86,26 +86,6 @@
  * +========================+
  */
 
-// Offset to distingish FP regs.
-#define MIPS_FP_REG_OFFSET 32
-// Offset to distinguish DP FP regs.
-#define MIPS_FP_DOUBLE 64
-// Reg types.
-#define MIPS_REGTYPE(x) (x & (MIPS_FP_REG_OFFSET | MIPS_FP_DOUBLE))
-#define MIPS_FPREG(x) ((x & MIPS_FP_REG_OFFSET) == MIPS_FP_REG_OFFSET)
-#define MIPS_DOUBLEREG(x) ((x & MIPS_FP_DOUBLE) == MIPS_FP_DOUBLE)
-#define MIPS_SINGLEREG(x) (MIPS_FPREG(x) && !MIPS_DOUBLEREG(x))
-// FIXME: out of date comment.
-/*
- * Note: the low register of a floating point pair is sufficient to
- * create the name of a double, but require both names to be passed to
- * allow for asserts to verify that the pair is consecutive if significant
- * rework is done in this area.  Also, it is a good reminder in the calling
- * code that reg locations always describe doubles as a pair of singles.
- */
-#define MIPS_S2D(x, y) ((x) | MIPS_FP_DOUBLE)
-// Mask to strip off fp flags.
-#define MIPS_FP_REG_MASK (MIPS_FP_REG_OFFSET-1)
 
 #define LOWORD_OFFSET 0
 #define HIWORD_OFFSET 4
@@ -159,135 +139,159 @@
 #define ENCODE_MIPS_REG_LO           (1ULL << kMipsRegLO)
 
 enum MipsNativeRegisterPool {
-  rZERO = 0,
-  rAT = 1,
-  rV0 = 2,
-  rV1 = 3,
-  rA0 = 4,
-  rA1 = 5,
-  rA2 = 6,
-  rA3 = 7,
-  rT0 = 8,
-  rT1 = 9,
-  rT2 = 10,
-  rT3 = 11,
-  rT4 = 12,
-  rT5 = 13,
-  rT6 = 14,
-  rT7 = 15,
-  rS0 = 16,
-  rS1 = 17,
-  rS2 = 18,
-  rS3 = 19,
-  rS4 = 20,
-  rS5 = 21,
-  rS6 = 22,
-  rS7 = 23,
-  rT8 = 24,
-  rT9 = 25,
-  rK0 = 26,
-  rK1 = 27,
-  rGP = 28,
-  rSP = 29,
-  rFP = 30,
-  rRA = 31,
+  rZERO = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
+  rAT   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
+  rV0   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  2,
+  rV1   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  3,
+  rA0   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  4,
+  rA1   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  5,
+  rA2   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  6,
+  rA3   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  7,
+  rT0   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  8,
+  rT1   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  9,
+  rT2   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
+  rT3   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
+  rT4   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
+  rT5   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
+  rT6   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
+  rT7   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
+  rS0   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
+  rS1   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 17,
+  rS2   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 18,
+  rS3   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 19,
+  rS4   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 20,
+  rS5   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 21,
+  rS6   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 22,
+  rS7   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 23,
+  rT8   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 24,
+  rT9   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 25,
+  rK0   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 26,
+  rK1   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 27,
+  rGP   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 28,
+  rSP   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 29,
+  rFP   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 30,
+  rRA   = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 31,
 
-  rF0 = 0 + MIPS_FP_REG_OFFSET,
-  rF1,
-  rF2,
-  rF3,
-  rF4,
-  rF5,
-  rF6,
-  rF7,
-  rF8,
-  rF9,
-  rF10,
-  rF11,
-  rF12,
-  rF13,
-  rF14,
-  rF15,
+  rF0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  0,
+  rF1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  1,
+  rF2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  2,
+  rF3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  3,
+  rF4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  4,
+  rF5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  5,
+  rF6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  6,
+  rF7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  7,
+  rF8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  8,
+  rF9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint |  9,
+  rF10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
+  rF11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
+  rF12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
+  rF13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
+  rF14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
+  rF15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
 #if 0
   /*
    * TODO: The shared resource mask doesn't have enough bit positions to describe all
    * MIPS registers.  Expand it and enable use of fp registers 16 through 31.
    */
-  rF16,
-  rF17,
-  rF18,
-  rF19,
-  rF20,
-  rF21,
-  rF22,
-  rF23,
-  rF24,
-  rF25,
-  rF26,
-  rF27,
-  rF28,
-  rF29,
-  rF30,
-  rF31,
+  rF16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
+  rF17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
+  rF18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
+  rF19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
+  rF20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
+  rF21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
+  rF22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
+  rF23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
+  rF24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
+  rF25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
+  rF26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
+  rF27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
+  rF28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
+  rF29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
+  rF30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
+  rF31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
 #endif
-  rDF0 = rF0 + MIPS_FP_DOUBLE,
-  rDF1 = rF2 + MIPS_FP_DOUBLE,
-  rDF2 = rF4 + MIPS_FP_DOUBLE,
-  rDF3 = rF6 + MIPS_FP_DOUBLE,
-  rDF4 = rF8 + MIPS_FP_DOUBLE,
-  rDF5 = rF10 + MIPS_FP_DOUBLE,
-  rDF6 = rF12 + MIPS_FP_DOUBLE,
-  rDF7 = rF14 + MIPS_FP_DOUBLE,
+  rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
+  rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
+  rD4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
+  rD6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
 #if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rDF8 = rF16 + MIPS_FP_DOUBLE,
-  rDF9 = rF18 + MIPS_FP_DOUBLE,
-  rDF10 = rF20 + MIPS_FP_DOUBLE,
-  rDF11 = rF22 + MIPS_FP_DOUBLE,
-  rDF12 = rF24 + MIPS_FP_DOUBLE,
-  rDF13 = rF26 + MIPS_FP_DOUBLE,
-  rDF14 = rF28 + MIPS_FP_DOUBLE,
-  rDF15 = rF30 + MIPS_FP_DOUBLE,
+  rD8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
+  rD10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+  rD12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+  rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
 #endif
 };
 
-const RegStorage rs_rZERO(RegStorage::k32BitSolo, rZERO);
-const RegStorage rs_rAT(RegStorage::k32BitSolo, rAT);
-const RegStorage rs_rV0(RegStorage::k32BitSolo, rV0);
-const RegStorage rs_rV1(RegStorage::k32BitSolo, rV1);
-const RegStorage rs_rA0(RegStorage::k32BitSolo, rA0);
-const RegStorage rs_rA1(RegStorage::k32BitSolo, rA1);
-const RegStorage rs_rA2(RegStorage::k32BitSolo, rA2);
-const RegStorage rs_rA3(RegStorage::k32BitSolo, rA3);
-const RegStorage rs_rT0(RegStorage::k32BitSolo, rT0);
-const RegStorage rs_rT1(RegStorage::k32BitSolo, rT1);
-const RegStorage rs_rT2(RegStorage::k32BitSolo, rT2);
-const RegStorage rs_rT3(RegStorage::k32BitSolo, rT3);
-const RegStorage rs_rT4(RegStorage::k32BitSolo, rT4);
-const RegStorage rs_rT5(RegStorage::k32BitSolo, rT5);
-const RegStorage rs_rT6(RegStorage::k32BitSolo, rT6);
-const RegStorage rs_rT7(RegStorage::k32BitSolo, rT7);
-const RegStorage rs_rS0(RegStorage::k32BitSolo, rS0);
-const RegStorage rs_rS1(RegStorage::k32BitSolo, rS1);
-const RegStorage rs_rS2(RegStorage::k32BitSolo, rS2);
-const RegStorage rs_rS3(RegStorage::k32BitSolo, rS3);
-const RegStorage rs_rS4(RegStorage::k32BitSolo, rS4);
-const RegStorage rs_rS5(RegStorage::k32BitSolo, rS5);
-const RegStorage rs_rS6(RegStorage::k32BitSolo, rS6);
-const RegStorage rs_rS7(RegStorage::k32BitSolo, rS7);
-const RegStorage rs_rT8(RegStorage::k32BitSolo, rT8);
-const RegStorage rs_rT9(RegStorage::k32BitSolo, rT9);
-const RegStorage rs_rK0(RegStorage::k32BitSolo, rK0);
-const RegStorage rs_rK1(RegStorage::k32BitSolo, rK1);
-const RegStorage rs_rGP(RegStorage::k32BitSolo, rGP);
-const RegStorage rs_rSP(RegStorage::k32BitSolo, rSP);
-const RegStorage rs_rFP(RegStorage::k32BitSolo, rFP);
-const RegStorage rs_rRA(RegStorage::k32BitSolo, rRA);
-const RegStorage rs_rF12(RegStorage::k32BitSolo, rF12);
-const RegStorage rs_rF13(RegStorage::k32BitSolo, rF13);
-const RegStorage rs_rF14(RegStorage::k32BitSolo, rF14);
-const RegStorage rs_rF15(RegStorage::k32BitSolo, rF15);
-const RegStorage rs_rF0(RegStorage::k32BitSolo, rF0);
-const RegStorage rs_rF1(RegStorage::k32BitSolo, rF1);
+constexpr RegStorage rs_rZERO(RegStorage::kValid | rZERO);
+constexpr RegStorage rs_rAT(RegStorage::kValid | rAT);
+constexpr RegStorage rs_rV0(RegStorage::kValid | rV0);
+constexpr RegStorage rs_rV1(RegStorage::kValid | rV1);
+constexpr RegStorage rs_rA0(RegStorage::kValid | rA0);
+constexpr RegStorage rs_rA1(RegStorage::kValid | rA1);
+constexpr RegStorage rs_rA2(RegStorage::kValid | rA2);
+constexpr RegStorage rs_rA3(RegStorage::kValid | rA3);
+constexpr RegStorage rs_rT0(RegStorage::kValid | rT0);
+constexpr RegStorage rs_rT1(RegStorage::kValid | rT1);
+constexpr RegStorage rs_rT2(RegStorage::kValid | rT2);
+constexpr RegStorage rs_rT3(RegStorage::kValid | rT3);
+constexpr RegStorage rs_rT4(RegStorage::kValid | rT4);
+constexpr RegStorage rs_rT5(RegStorage::kValid | rT5);
+constexpr RegStorage rs_rT6(RegStorage::kValid | rT6);
+constexpr RegStorage rs_rT7(RegStorage::kValid | rT7);
+constexpr RegStorage rs_rS0(RegStorage::kValid | rS0);
+constexpr RegStorage rs_rS1(RegStorage::kValid | rS1);
+constexpr RegStorage rs_rS2(RegStorage::kValid | rS2);
+constexpr RegStorage rs_rS3(RegStorage::kValid | rS3);
+constexpr RegStorage rs_rS4(RegStorage::kValid | rS4);
+constexpr RegStorage rs_rS5(RegStorage::kValid | rS5);
+constexpr RegStorage rs_rS6(RegStorage::kValid | rS6);
+constexpr RegStorage rs_rS7(RegStorage::kValid | rS7);
+constexpr RegStorage rs_rT8(RegStorage::kValid | rT8);
+constexpr RegStorage rs_rT9(RegStorage::kValid | rT9);
+constexpr RegStorage rs_rK0(RegStorage::kValid | rK0);
+constexpr RegStorage rs_rK1(RegStorage::kValid | rK1);
+constexpr RegStorage rs_rGP(RegStorage::kValid | rGP);
+constexpr RegStorage rs_rSP(RegStorage::kValid | rSP);
+constexpr RegStorage rs_rFP(RegStorage::kValid | rFP);
+constexpr RegStorage rs_rRA(RegStorage::kValid | rRA);
+
+constexpr RegStorage rs_rMIPS_LR(RegStorage::kInvalid);     // Not used for MIPS.
+constexpr RegStorage rs_rMIPS_PC(RegStorage::kInvalid);     // Not used for MIPS.
+constexpr RegStorage rs_rMIPS_COUNT(RegStorage::kInvalid);  // Not used for MIPS.
+
+constexpr RegStorage rs_rF0(RegStorage::kValid | rF0);
+constexpr RegStorage rs_rF1(RegStorage::kValid | rF1);
+constexpr RegStorage rs_rF2(RegStorage::kValid | rF2);
+constexpr RegStorage rs_rF3(RegStorage::kValid | rF3);
+constexpr RegStorage rs_rF4(RegStorage::kValid | rF4);
+constexpr RegStorage rs_rF5(RegStorage::kValid | rF5);
+constexpr RegStorage rs_rF6(RegStorage::kValid | rF6);
+constexpr RegStorage rs_rF7(RegStorage::kValid | rF7);
+constexpr RegStorage rs_rF8(RegStorage::kValid | rF8);
+constexpr RegStorage rs_rF9(RegStorage::kValid | rF9);
+constexpr RegStorage rs_rF10(RegStorage::kValid | rF10);
+constexpr RegStorage rs_rF11(RegStorage::kValid | rF11);
+constexpr RegStorage rs_rF12(RegStorage::kValid | rF12);
+constexpr RegStorage rs_rF13(RegStorage::kValid | rF13);
+constexpr RegStorage rs_rF14(RegStorage::kValid | rF14);
+constexpr RegStorage rs_rF15(RegStorage::kValid | rF15);
+
+constexpr RegStorage rs_rD0(RegStorage::kValid | rD0);
+constexpr RegStorage rs_rD1(RegStorage::kValid | rD1);
+constexpr RegStorage rs_rD2(RegStorage::kValid | rD2);
+constexpr RegStorage rs_rD3(RegStorage::kValid | rD3);
+constexpr RegStorage rs_rD4(RegStorage::kValid | rD4);
+constexpr RegStorage rs_rD5(RegStorage::kValid | rD5);
+constexpr RegStorage rs_rD6(RegStorage::kValid | rD6);
+constexpr RegStorage rs_rD7(RegStorage::kValid | rD7);
 
 // TODO: reduce/eliminate use of these.
 #define rMIPS_SUSPEND rS0
@@ -311,9 +315,9 @@
 #define rMIPS_FARG2 rFARG2
 #define rs_rMIPS_FARG2 rs_rFARG2
 #define rMIPS_FARG3 rFARG3
-#define rs_MIPS_FARG3 rs_rFARG3
+#define rs_rMIPS_FARG3 rs_rFARG3
 #define rMIPS_RET0 rRESULT0
-#define rs_MIPS_RET0 rs_rRESULT0
+#define rs_rMIPS_RET0 rs_rRESULT0
 #define rMIPS_RET1 rRESULT1
 #define rs_rMIPS_RET1 rs_rRESULT1
 #define rMIPS_INVOKE_TGT rT9
@@ -322,16 +326,17 @@
 
 // RegisterLocation templates return values (r_V0, or r_V0/r_V1).
 const RegLocation mips_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, rV0), INVALID_SREG, INVALID_SREG};
 const RegLocation mips_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rV0, rV1), INVALID_SREG, INVALID_SREG};
 const RegLocation mips_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, rF0), INVALID_SREG, INVALID_SREG};
+// FIXME: move MIPS to k64Bitsolo for doubles
 const RegLocation mips_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rF0, rF1), INVALID_SREG, INVALID_SREG};
 
 enum MipsShiftEncodings {
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 7f4cd5e..55cf434 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -26,18 +26,41 @@
 
 namespace art {
 
-static int core_regs[] = {rZERO, rAT, rV0, rV1, rA0, rA1, rA2, rA3,
-                          rT0, rT1, rT2, rT3, rT4, rT5, rT6, rT7,
-                          rS0, rS1, rS2, rS3, rS4, rS5, rS6, rS7, rT8,
-                          rT9, rK0, rK1, rGP, rSP, rFP, rRA};
-static int ReservedRegs[] = {rZERO, rAT, rS0, rS1, rK0, rK1, rGP, rSP,
-                             rRA};
-static int core_temps[] = {rV0, rV1, rA0, rA1, rA2, rA3, rT0, rT1, rT2,
-                           rT3, rT4, rT5, rT6, rT7, rT8};
-static int FpRegs[] = {rF0, rF1, rF2, rF3, rF4, rF5, rF6, rF7,
-                       rF8, rF9, rF10, rF11, rF12, rF13, rF14, rF15};
-static int fp_temps[] = {rF0, rF1, rF2, rF3, rF4, rF5, rF6, rF7,
-                         rF8, rF9, rF10, rF11, rF12, rF13, rF14, rF15};
+static const RegStorage core_regs_arr[] =
+    {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2,
+     rs_rT3, rs_rT4, rs_rT5, rs_rT6, rs_rT7, rs_rS0, rs_rS1, rs_rS2, rs_rS3, rs_rS4, rs_rS5,
+     rs_rS6, rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP, rs_rRA};
+static RegStorage sp_regs_arr[] =
+    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
+     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
+static RegStorage dp_regs_arr[] =
+    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
+static const RegStorage reserved_regs_arr[] =
+    {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
+static RegStorage core_temps_arr[] =
+    {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rT4,
+     rs_rT5, rs_rT6, rs_rT7, rs_rT8};
+static RegStorage sp_temps_arr[] =
+    {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
+     rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
+static RegStorage dp_temps_arr[] =
+    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
+
+static const std::vector<RegStorage> empty_pool;
+static const std::vector<RegStorage> core_regs(core_regs_arr,
+    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
+static const std::vector<RegStorage> sp_regs(sp_regs_arr,
+    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
+static const std::vector<RegStorage> dp_regs(dp_regs_arr,
+    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
+static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
+    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
+static const std::vector<RegStorage> core_temps(core_temps_arr,
+    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr,
+    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr,
+    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
 
 RegLocation MipsMir2Lir::LocCReturn() {
   return mips_loc_c_return;
@@ -57,29 +80,29 @@
 
 // Return a target-dependent special register.
 RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
-  int res_reg = RegStorage::kInvalidRegVal;
+  RegStorage res_reg;
   switch (reg) {
-    case kSelf: res_reg = rMIPS_SELF; break;
-    case kSuspend: res_reg =  rMIPS_SUSPEND; break;
-    case kLr: res_reg =  rMIPS_LR; break;
-    case kPc: res_reg =  rMIPS_PC; break;
-    case kSp: res_reg =  rMIPS_SP; break;
-    case kArg0: res_reg = rMIPS_ARG0; break;
-    case kArg1: res_reg = rMIPS_ARG1; break;
-    case kArg2: res_reg = rMIPS_ARG2; break;
-    case kArg3: res_reg = rMIPS_ARG3; break;
-    case kFArg0: res_reg = rMIPS_FARG0; break;
-    case kFArg1: res_reg = rMIPS_FARG1; break;
-    case kFArg2: res_reg = rMIPS_FARG2; break;
-    case kFArg3: res_reg = rMIPS_FARG3; break;
-    case kRet0: res_reg = rMIPS_RET0; break;
-    case kRet1: res_reg = rMIPS_RET1; break;
-    case kInvokeTgt: res_reg = rMIPS_INVOKE_TGT; break;
-    case kHiddenArg: res_reg = rT0; break;
-    case kHiddenFpArg: res_reg = RegStorage::kInvalidRegVal; break;
-    case kCount: res_reg = rMIPS_COUNT; break;
+    case kSelf: res_reg = rs_rMIPS_SELF; break;
+    case kSuspend: res_reg =  rs_rMIPS_SUSPEND; break;
+    case kLr: res_reg =  rs_rMIPS_LR; break;
+    case kPc: res_reg =  rs_rMIPS_PC; break;
+    case kSp: res_reg =  rs_rMIPS_SP; break;
+    case kArg0: res_reg = rs_rMIPS_ARG0; break;
+    case kArg1: res_reg = rs_rMIPS_ARG1; break;
+    case kArg2: res_reg = rs_rMIPS_ARG2; break;
+    case kArg3: res_reg = rs_rMIPS_ARG3; break;
+    case kFArg0: res_reg = rs_rMIPS_FARG0; break;
+    case kFArg1: res_reg = rs_rMIPS_FARG1; break;
+    case kFArg2: res_reg = rs_rMIPS_FARG2; break;
+    case kFArg3: res_reg = rs_rMIPS_FARG3; break;
+    case kRet0: res_reg = rs_rMIPS_RET0; break;
+    case kRet1: res_reg = rs_rMIPS_RET1; break;
+    case kInvokeTgt: res_reg = rs_rMIPS_INVOKE_TGT; break;
+    case kHiddenArg: res_reg = rs_rT0; break;
+    case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
+    case kCount: res_reg = rs_rMIPS_COUNT; break;
   }
-  return RegStorage::Solo32(res_reg);
+  return res_reg;
 }
 
 RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
@@ -96,35 +119,22 @@
   }
 }
 
-// Create a double from a pair of singles.
-int MipsMir2Lir::S2d(int low_reg, int high_reg) {
-  return MIPS_S2D(low_reg, high_reg);
-}
-
-// Return mask to strip off fp reg flags and bias.
-uint32_t MipsMir2Lir::FpRegMask() {
-  return MIPS_FP_REG_MASK;
-}
-
-// True if both regs single, both core or both double.
-bool MipsMir2Lir::SameRegType(int reg1, int reg2) {
-  return (MIPS_REGTYPE(reg1) == MIPS_REGTYPE(reg2));
-}
-
 /*
  * Decode the register id.
  */
-uint64_t MipsMir2Lir::GetRegMaskCommon(int reg) {
+uint64_t MipsMir2Lir::GetRegMaskCommon(RegStorage reg) {
   uint64_t seed;
   int shift;
-  int reg_id;
-
-
-  reg_id = reg & 0x1f;
+  int reg_id = reg.GetRegNum();
   /* Each double register is equal to a pair of single-precision FP registers */
-  seed = MIPS_DOUBLEREG(reg) ? 3 : 1;
-  /* FP register starts at bit position 16 */
-  shift = MIPS_FPREG(reg) ? kMipsFPReg0 : 0;
+  if (reg.IsDouble()) {
+    seed = 0x3;
+    reg_id = reg_id << 1;
+  } else {
+    seed = 1;
+  }
+  /* FP register starts at bit position 32 */
+  shift = reg.IsFloat() ? kMipsFPReg0 : 0;
   /* Expand the double register id into single offset */
   shift += reg_id;
   return (seed << shift);
@@ -209,11 +219,11 @@
              }
              break;
            case 's':
-             snprintf(tbuf, arraysize(tbuf), "$f%d", operand & MIPS_FP_REG_MASK);
+             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
              break;
            case 'S':
-             DCHECK_EQ(((operand & MIPS_FP_REG_MASK) & 1), 0);
-             snprintf(tbuf, arraysize(tbuf), "$f%d", operand & MIPS_FP_REG_MASK);
+             DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
+             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
              break;
            case 'h':
              snprintf(tbuf, arraysize(tbuf), "%04x", operand);
@@ -327,7 +337,7 @@
  */
 
 void MipsMir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rRA);
+  core_spill_mask_ |= (1 << rs_rRA.GetRegNum());
   num_core_spills_++;
 }
 
@@ -337,92 +347,63 @@
  * include any holes in the mask.  Associate holes with
  * Dalvik register INVALID_VREG (0xFFFFU).
  */
-void MipsMir2Lir::MarkPreservedSingle(int s_reg, int reg) {
+void MipsMir2Lir::MarkPreservedSingle(int s_reg, RegStorage reg) {
   LOG(FATAL) << "No support yet for promoted FP regs";
 }
 
-void MipsMir2Lir::FlushRegWide(RegStorage reg) {
-  RegisterInfo* info1 = GetRegInfo(reg.GetLowReg());
-  RegisterInfo* info2 = GetRegInfo(reg.GetHighReg());
-  DCHECK(info1 && info2 && info1->pair && info2->pair &&
-         (info1->partner == info2->reg) &&
-         (info2->partner == info1->reg));
-  if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) {
-    if (!(info1->is_temp && info2->is_temp)) {
-      /* Should not happen.  If it does, there's a problem in eval_loc */
-      LOG(FATAL) << "Long half-temp, half-promoted";
-    }
-
-    info1->dirty = false;
-    info2->dirty = false;
-    if (mir_graph_->SRegToVReg(info2->s_reg) < mir_graph_->SRegToVReg(info1->s_reg))
-      info1 = info2;
-    int v_reg = mir_graph_->SRegToVReg(info1->s_reg);
-    StoreBaseDispWide(rs_rMIPS_SP, VRegOffset(v_reg),
-                      RegStorage(RegStorage::k64BitPair, info1->reg, info1->partner));
-  }
-}
-
-void MipsMir2Lir::FlushReg(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg.GetReg());
-  if (info->live && info->dirty) {
-    info->dirty = false;
-    int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    Store32Disp(rs_rMIPS_SP, VRegOffset(v_reg), reg);
-  }
-}
-
-/* Give access to the target-dependent FP register encoding to common code */
-bool MipsMir2Lir::IsFpReg(int reg) {
-  return MIPS_FPREG(reg);
-}
-
-bool MipsMir2Lir::IsFpReg(RegStorage reg) {
-  return IsFpReg(reg.IsPair() ? reg.GetLowReg() : reg.GetReg());
+void MipsMir2Lir::MarkPreservedDouble(int s_reg, RegStorage reg) {
+  LOG(FATAL) << "No support yet for promoted FP regs";
 }
 
 /* Clobber all regs that might be used by an external C call */
 void MipsMir2Lir::ClobberCallerSave() {
-  Clobber(rZERO);
-  Clobber(rAT);
-  Clobber(rV0);
-  Clobber(rV1);
-  Clobber(rA0);
-  Clobber(rA1);
-  Clobber(rA2);
-  Clobber(rA3);
-  Clobber(rT0);
-  Clobber(rT1);
-  Clobber(rT2);
-  Clobber(rT3);
-  Clobber(rT4);
-  Clobber(rT5);
-  Clobber(rT6);
-  Clobber(rT7);
-  Clobber(rT8);
-  Clobber(rT9);
-  Clobber(rK0);
-  Clobber(rK1);
-  Clobber(rGP);
-  Clobber(rFP);
-  Clobber(rRA);
-  Clobber(rF0);
-  Clobber(rF1);
-  Clobber(rF2);
-  Clobber(rF3);
-  Clobber(rF4);
-  Clobber(rF5);
-  Clobber(rF6);
-  Clobber(rF7);
-  Clobber(rF8);
-  Clobber(rF9);
-  Clobber(rF10);
-  Clobber(rF11);
-  Clobber(rF12);
-  Clobber(rF13);
-  Clobber(rF14);
-  Clobber(rF15);
+  Clobber(rs_rZERO);
+  Clobber(rs_rAT);
+  Clobber(rs_rV0);
+  Clobber(rs_rV1);
+  Clobber(rs_rA0);
+  Clobber(rs_rA1);
+  Clobber(rs_rA2);
+  Clobber(rs_rA3);
+  Clobber(rs_rT0);
+  Clobber(rs_rT1);
+  Clobber(rs_rT2);
+  Clobber(rs_rT3);
+  Clobber(rs_rT4);
+  Clobber(rs_rT5);
+  Clobber(rs_rT6);
+  Clobber(rs_rT7);
+  Clobber(rs_rT8);
+  Clobber(rs_rT9);
+  Clobber(rs_rK0);
+  Clobber(rs_rK1);
+  Clobber(rs_rGP);
+  Clobber(rs_rFP);
+  Clobber(rs_rRA);
+  Clobber(rs_rF0);
+  Clobber(rs_rF1);
+  Clobber(rs_rF2);
+  Clobber(rs_rF3);
+  Clobber(rs_rF4);
+  Clobber(rs_rF5);
+  Clobber(rs_rF6);
+  Clobber(rs_rF7);
+  Clobber(rs_rF8);
+  Clobber(rs_rF9);
+  Clobber(rs_rF10);
+  Clobber(rs_rF11);
+  Clobber(rs_rF12);
+  Clobber(rs_rF13);
+  Clobber(rs_rF14);
+  Clobber(rs_rF15);
+  Clobber(rs_rD0);
+  Clobber(rs_rD1);
+  Clobber(rs_rD2);
+  Clobber(rs_rD3);
+  Clobber(rs_rD4);
+  Clobber(rs_rD5);
+  Clobber(rs_rD6);
+  Clobber(rs_rD7);
 }
 
 RegLocation MipsMir2Lir::GetReturnWideAlt() {
@@ -439,91 +420,63 @@
 
 /* To be used when explicitly managing register use */
 void MipsMir2Lir::LockCallTemps() {
-  LockTemp(rMIPS_ARG0);
-  LockTemp(rMIPS_ARG1);
-  LockTemp(rMIPS_ARG2);
-  LockTemp(rMIPS_ARG3);
+  LockTemp(rs_rMIPS_ARG0);
+  LockTemp(rs_rMIPS_ARG1);
+  LockTemp(rs_rMIPS_ARG2);
+  LockTemp(rs_rMIPS_ARG3);
 }
 
 /* To be used when explicitly managing register use */
 void MipsMir2Lir::FreeCallTemps() {
-  FreeTemp(rMIPS_ARG0);
-  FreeTemp(rMIPS_ARG1);
-  FreeTemp(rMIPS_ARG2);
-  FreeTemp(rMIPS_ARG3);
+  FreeTemp(rs_rMIPS_ARG0);
+  FreeTemp(rs_rMIPS_ARG1);
+  FreeTemp(rs_rMIPS_ARG2);
+  FreeTemp(rs_rMIPS_ARG3);
 }
 
-void MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
+  return true;
+#else
+  return false;
 #endif
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage MipsMir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  int high_reg;
-  int low_reg;
-
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  }
-
-  low_reg = AllocTemp().GetReg();
-  high_reg = AllocTemp().GetReg();
-  return RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
-}
-
-RegStorage MipsMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempFloat();
-}
-  return AllocTemp();
-}
-
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
-  int num_regs = sizeof(core_regs)/sizeof(*core_regs);
-  int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs);
-  int num_temps = sizeof(core_temps)/sizeof(*core_temps);
-  int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
-  int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
-                                                       kArenaAllocRegAlloc));
-  reg_pool_->num_core_regs = num_regs;
-  reg_pool_->core_regs = static_cast<RegisterInfo*>
-     (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), kArenaAllocRegAlloc));
-  reg_pool_->num_fp_regs = num_fp_regs;
-  reg_pool_->FPRegs = static_cast<RegisterInfo*>
-      (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), kArenaAllocRegAlloc));
-  CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
-  CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
-  // Keep special registers from being allocated
-  for (int i = 0; i < num_reserved; i++) {
-    if (NO_SUSPEND && (ReservedRegs[i] == rMIPS_SUSPEND)) {
-      // To measure cost of suspend check
-      continue;
+  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */, sp_regs,
+                                        dp_regs, reserved_regs, empty_pool /* reserved64 */,
+                                        core_temps, empty_pool /* core64_temps */, sp_temps,
+                                        dp_temps);
+
+  // Target-specific adjustments.
+
+  // Alias single precision floats to appropriate half of overlapping double.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    int sp_reg_num = info->GetReg().GetRegNum();
+    int dp_reg_num = sp_reg_num >> 1;
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // Double precision register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect single precision's master storage to master.
+    info->SetMaster(dp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+    if (sp_reg_num & 1) {
+      // For odd singles, change to user the high word of the backing double.
+      info->SetStorageMask(0x2);
     }
-    MarkInUse(ReservedRegs[i]);
   }
-  // Mark temp regs - all others not in use can be used for promotion
-  for (int i = 0; i < num_temps; i++) {
-    MarkTemp(core_temps[i]);
-  }
-  for (int i = 0; i < num_fp_temps; i++) {
-    MarkTemp(fp_temps[i]);
-  }
+
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust when we roll to hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 2;
+  reg_pool_->next_dp_reg_ = 1;
 }
 
-void MipsMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
-}
 /*
  * In the Arm code a it is typical to use the link register
  * to hold the target address.  However, for Mips we must
@@ -536,6 +489,11 @@
   return rs_rT9;
 }
 
+RegStorage MipsMir2Lir::LoadHelper(ThreadOffset<8> offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+  return RegStorage::InvalidReg();
+}
+
 LIR* MipsMir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = AllocTemp();
   // NOTE: native pointer.
@@ -579,6 +537,18 @@
   return (lir->opcode == kMipsB);
 }
 
+bool MipsMir2Lir::SupportsVolatileLoadStore(OpSize size) {
+  // No support for 64-bit atomic load/store on mips.
+  return size != k64 && size != kDouble;
+}
+
+RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  // No support for 64-bit atomic load/store on mips.
+  DCHECK(size != k64 && size != kDouble);
+  // TODO: Verify that both core and fp registers are suitable for smaller sizes.
+  return RegClassBySize(size);
+}
+
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena) {
   for (int i = 0; i < kMipsLast; i++) {
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index a865430..2757b7b 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -24,12 +24,12 @@
 LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
   /* must be both DOUBLE or both not DOUBLE */
-  DCHECK_EQ(MIPS_DOUBLEREG(r_dest.GetReg()), MIPS_DOUBLEREG(r_src.GetReg()));
-  if (MIPS_DOUBLEREG(r_dest.GetReg())) {
+  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
+  if (r_dest.IsDouble()) {
     opcode = kMipsFmovd;
   } else {
-    if (MIPS_SINGLEREG(r_dest.GetReg())) {
-      if (MIPS_SINGLEREG(r_src.GetReg())) {
+    if (r_dest.IsSingle()) {
+      if (r_src.IsSingle()) {
         opcode = kMipsFmovs;
       } else {
         /* note the operands are swapped for the mtc1 instr */
@@ -39,7 +39,7 @@
         opcode = kMipsMtc1;
       }
     } else {
-      DCHECK(MIPS_SINGLEREG(r_src.GetReg()));
+      DCHECK(r_src.IsSingle());
       opcode = kMipsMfc1;
     }
   }
@@ -79,9 +79,9 @@
   LIR *res;
 
   RegStorage r_dest_save = r_dest;
-  int is_fp_reg = MIPS_FPREG(r_dest.GetReg());
+  int is_fp_reg = r_dest.IsFloat();
   if (is_fp_reg) {
-    DCHECK(MIPS_SINGLEREG(r_dest.GetReg()));
+    DCHECK(r_dest.IsSingle());
     r_dest = AllocTemp();
   }
 
@@ -355,8 +355,8 @@
   MipsOpCode opcode = kMipsNop;
   RegStorage t_reg = AllocTemp();
 
-  if (MIPS_FPREG(r_dest.GetReg())) {
-    DCHECK(MIPS_SINGLEREG(r_dest.GetReg()));
+  if (r_dest.IsFloat()) {
+    DCHECK(r_dest.IsSingle());
     DCHECK((size == k32) || (size == kSingle) || (size == kReference));
     size = kSingle;
   } else {
@@ -407,8 +407,8 @@
   MipsOpCode opcode = kMipsNop;
   RegStorage t_reg = AllocTemp();
 
-  if (MIPS_FPREG(r_src.GetReg())) {
-    DCHECK(MIPS_SINGLEREG(r_src.GetReg()));
+  if (r_src.IsFloat()) {
+    DCHECK(r_src.IsSingle());
     DCHECK((size == k32) || (size == kSingle) || (size == kReference));
     size = kSingle;
   } else {
@@ -448,7 +448,7 @@
 
 // FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   RegStorage r_dest_hi, OpSize size, int s_reg) {
+                                   RegStorage r_dest_hi, OpSize size) {
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -469,16 +469,16 @@
     case kDouble:
       pair = true;
       opcode = kMipsLw;
-      if (MIPS_FPREG(r_dest.GetReg())) {
+      if (r_dest.IsFloat()) {
         opcode = kMipsFlwc1;
-        if (MIPS_DOUBLEREG(r_dest.GetReg())) {
-          // TODO: rework to use k64BitSolo
-          r_dest.SetReg(r_dest.GetReg() - MIPS_FP_DOUBLE);
+        if (r_dest.IsDouble()) {
+          int reg_num = (r_dest.GetRegNum() << 1) | RegStorage::kFloatingPoint;
+          r_dest = RegStorage(RegStorage::k64BitSolo, reg_num, reg_num + 1);
         } else {
-          DCHECK(MIPS_FPREG(r_dest_hi.GetReg()));
+          DCHECK(r_dest_hi.IsFloat());
           DCHECK_EQ(r_dest.GetReg(), r_dest_hi.GetReg() - 1);
+          r_dest_hi.SetReg(r_dest.GetReg() + 1);
         }
-        r_dest_hi.SetReg(r_dest.GetReg() + 1);
       }
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
@@ -487,9 +487,9 @@
     case kSingle:
     case kReference:
       opcode = kMipsLw;
-      if (MIPS_FPREG(r_dest.GetReg())) {
+      if (r_dest.IsFloat()) {
         opcode = kMipsFlwc1;
-        DCHECK(MIPS_SINGLEREG(r_dest.GetReg()));
+        DCHECK(r_dest.IsSingle());
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -545,21 +545,26 @@
   return load;
 }
 
+LIR* MipsMir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                                       OpSize size) {
+  DCHECK(size != k64 && size != kDouble);
+  return LoadBaseDisp(r_base, displacement, r_dest, size);
+}
+
 LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                               OpSize size, int s_reg) {
+                               OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size,
-                          s_reg);
+  if (size == k64 || size == kDouble) {
+    return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size);
+  } else {
+    return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size);
+  }
 }
 
-LIR* MipsMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
-}
-
+// FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement,
                                     RegStorage r_src, RegStorage r_src_hi, OpSize size) {
   LIR *res;
@@ -567,22 +572,22 @@
   LIR *store2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = false;
+  bool pair = r_src.IsPair();
 
   switch (size) {
     case k64:
     case kDouble:
-      pair = true;
       opcode = kMipsSw;
-      if (MIPS_FPREG(r_src.GetReg())) {
+      if (r_src.IsFloat()) {
         opcode = kMipsFswc1;
-        if (MIPS_DOUBLEREG(r_src.GetReg())) {
-          r_src.SetReg(r_src.GetReg() - MIPS_FP_DOUBLE);
+        if (r_src.IsDouble()) {
+          int reg_num = (r_src.GetRegNum() << 1) | RegStorage::kFloatingPoint;
+          r_src = RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
         } else {
-          DCHECK(MIPS_FPREG(r_src_hi.GetReg()));
+          DCHECK(r_src_hi.IsFloat());
           DCHECK_EQ(r_src.GetReg(), (r_src_hi.GetReg() - 1));
+          r_src_hi.SetReg(r_src.GetReg() + 1);
         }
-        r_src_hi.SetReg(r_src.GetReg() + 1);
       }
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
@@ -591,9 +596,9 @@
     case kSingle:
     case kReference:
       opcode = kMipsSw;
-      if (MIPS_FPREG(r_src.GetReg())) {
+      if (r_src.IsFloat()) {
         opcode = kMipsFswc1;
-        DCHECK(MIPS_SINGLEREG(r_src.GetReg()));
+        DCHECK(r_src.IsSingle());
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -641,17 +646,23 @@
   return res;
 }
 
+LIR* MipsMir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                                        OpSize size) {
+  DCHECK(size != k64 && size != kDouble);
+  return StoreBaseDisp(r_base, displacement, r_src, size);
+}
+
 LIR* MipsMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                 OpSize size) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
-}
-
-LIR* MipsMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), k64);
+  if (size == k64 || size == kDouble) {
+    return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), size);
+  } else {
+    return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
+  }
 }
 
 LIR* MipsMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
@@ -659,14 +670,18 @@
   return NULL;
 }
 
+LIR* MipsMir2Lir::OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) {
+  UNIMPLEMENTED(FATAL) << "Should not be called.";
+  return nullptr;
+}
+
 LIR* MipsMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
   LOG(FATAL) << "Unexpected use of OpMem for MIPS";
   return NULL;
 }
 
 LIR* MipsMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                       int displacement, RegStorage r_src, RegStorage r_src_hi,
-                                       OpSize size, int s_reg) {
+                                       int displacement, RegStorage r_src, OpSize size) {
   LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for MIPS";
   return NULL;
 }
@@ -677,8 +692,7 @@
 }
 
 LIR* MipsMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_dest, RegStorage r_dest_hi,
-                                      OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_dest, OpSize size) {
   LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for MIPS";
   return NULL;
 }
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index b2362fc..2f37520 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -25,20 +25,18 @@
 
 /* Mark a temp register as dead.  Does not affect allocation state. */
 inline void Mir2Lir::ClobberBody(RegisterInfo* p) {
-  if (p->is_temp) {
-    DCHECK(!(p->live && p->dirty))  << "Live & dirty temp in clobber";
-    p->live = false;
-    p->s_reg = INVALID_SREG;
-    p->def_start = NULL;
-    p->def_end = NULL;
-    if (p->pair) {
-      p->pair = false;
-      p = GetRegInfo(p->partner);
-      p->pair = false;
-      p->live = false;
-      p->s_reg = INVALID_SREG;
-      p->def_start = NULL;
-      p->def_end = NULL;
+  DCHECK(p->IsTemp());
+  if (p->SReg() != INVALID_SREG) {
+    DCHECK(!(p->IsLive() && p->IsDirty()))  << "Live & dirty temp in clobber";
+    p->MarkDead();
+    if (p->IsWide()) {
+      p->SetIsWide(false);
+      if (p->GetReg() != p->Partner()) {
+        // Register pair - deal with the other half.
+        p = GetRegInfo(p->Partner());
+        p->SetIsWide(false);
+        p->MarkDead();
+      }
     }
   }
 }
@@ -143,13 +141,15 @@
  * Mark the corresponding bit(s).
  */
 inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) {
-  *mask |= GetRegMaskCommon(reg);
+  DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0);
+  DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg;
+  *mask |= reginfo_map_.Get(reg)->DefUseMask();
 }
 
 /*
  * Set up the proper fields in the resource mask
  */
-inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
+inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) {
   int opcode = lir->opcode;
 
   if (IsPseudoLirOp(opcode)) {
@@ -170,7 +170,7 @@
   lir->flags.size = GetInsnSize(lir);
   estimated_native_code_size_ += lir->flags.size;
   /* Set up the mask for resources that are updated */
-  if (flags & (IS_LOAD | IS_STORE)) {
+  if (!leave_mem_ref && (flags & (IS_LOAD | IS_STORE))) {
     /* Default to heap - will catch specialized classes later */
     SetMemRefType(lir, flags & IS_LOAD, kHeapRef);
   }
@@ -228,9 +228,11 @@
   SetupTargetResourceMasks(lir, flags);
 }
 
-inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(int reg) {
-  DCHECK(reginfo_map_.Get(reg) != NULL);
-  return reginfo_map_.Get(reg);
+inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) {
+  RegisterInfo* res = reg.IsPair() ? reginfo_map_.Get(reg.GetLowReg()) :
+      reginfo_map_.Get(reg.GetReg());
+  DCHECK(res != nullptr);
+  return res;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6c5279e..9fc93d0 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -37,7 +37,7 @@
 }
 
 // TODO: needs revisit for 64-bit.
-RegStorage Mir2Lir::LoadArg(int in_position, bool wide) {
+RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
   RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
       RegStorage::InvalidReg();
@@ -56,28 +56,45 @@
   if (wide && !reg_arg_high.Valid()) {
     // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
     if (!reg_arg_low.Valid()) {
-      RegStorage new_regs = AllocTypedTempWide(false, kAnyReg);
-      reg_arg_low = new_regs.GetLow();
-      reg_arg_high = new_regs.GetHigh();
-      LoadBaseDispWide(TargetReg(kSp), offset, new_regs, INVALID_SREG);
+      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
+      LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64);
+      return new_regs;  // The reg_class is OK, we can return.
     } else {
+      // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory,
+      // i.e. the low part is in a core reg. Load the second part in a core reg as well for now.
+      DCHECK(!reg_arg_low.IsFloat());
       reg_arg_high = AllocTemp();
       int offset_high = offset + sizeof(uint32_t);
       Load32Disp(TargetReg(kSp), offset_high, reg_arg_high);
+      // Continue below to check the reg_class.
     }
   }
 
   // If the low part is not in a register yet, we need to load it.
   if (!reg_arg_low.Valid()) {
-    reg_arg_low = AllocTemp();
+    // Assume that if the low part of a wide arg is passed in memory, so is the high part,
+    // thus we don't get here for wide args as it's handled above. Big-endian ABIs could
+    // conceivably break this assumption but Android supports only little-endian architectures.
+    DCHECK(!wide);
+    reg_arg_low = AllocTypedTemp(false, reg_class);
     Load32Disp(TargetReg(kSp), offset, reg_arg_low);
+    return reg_arg_low;  // The reg_class is OK, we can return.
   }
 
-  if (wide) {
-    return RegStorage::MakeRegPair(reg_arg_low, reg_arg_high);
-  } else {
-    return reg_arg_low;
+  RegStorage reg_arg = wide ? RegStorage::MakeRegPair(reg_arg_low, reg_arg_high) : reg_arg_low;
+  // Check if we need to copy the arg to a different reg_class.
+  if (!RegClassMatches(reg_class, reg_arg)) {
+    if (wide) {
+      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
+      OpRegCopyWide(new_regs, reg_arg);
+      reg_arg = new_regs;
+    } else {
+      RegStorage new_reg = AllocTypedTemp(false, reg_class);
+      OpRegCopy(new_reg, reg_arg);
+      reg_arg = new_reg;
+    }
   }
+  return reg_arg;
 }
 
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
@@ -112,7 +129,7 @@
       OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
     } else {
-      LoadBaseDispWide(TargetReg(kSp), offset, rl_dest.reg, INVALID_SREG);
+      LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
     }
   }
 }
@@ -126,24 +143,41 @@
   }
 
   bool wide = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE));
+  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
+  OpSize size = LoadStoreOpSize(wide, ref);
+  if (data.is_volatile && !SupportsVolatileLoadStore(size)) {
+    return false;
+  }
+
   // The inliner doesn't distinguish kDouble or kFloat, use shorty.
   bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
+  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
   RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
-  RegStorage reg_obj = LoadArg(data.object_arg);
-  if (wide) {
-    LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.reg, INVALID_SREG);
-  } else {
-    Load32Disp(reg_obj, data.field_offset, rl_dest.reg);
+  RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegStorage r_result = rl_dest.reg;
+  if (!RegClassMatches(reg_class, r_result)) {
+    r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
+                    : AllocTypedTemp(rl_dest.fp, reg_class);
   }
   if (data.is_volatile) {
+    LoadBaseDispVolatile(reg_obj, data.field_offset, r_result, size);
     // Without context sensitive analysis, we must issue the most conservative barriers.
     // In this case, either a load or store may follow so we issue both barriers.
     GenMemBarrier(kLoadLoad);
     GenMemBarrier(kLoadStore);
+  } else {
+    LoadBaseDisp(reg_obj, data.field_offset, r_result, size);
+  }
+  if (r_result != rl_dest.reg) {
+    if (wide) {
+      OpRegCopyWide(rl_dest.reg, r_result);
+    } else {
+      OpRegCopy(rl_dest.reg, r_result);
+    }
   }
   return true;
 }
@@ -161,27 +195,29 @@
   }
 
   bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE));
+  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
+  OpSize size = LoadStoreOpSize(wide, ref);
+  if (data.is_volatile && !SupportsVolatileLoadStore(size)) {
+    return false;
+  }
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
   LockArg(data.src_arg, wide);
-  RegStorage reg_obj = LoadArg(data.object_arg);
-  RegStorage reg_src = LoadArg(data.src_arg, wide);
+  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
+  RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
   if (data.is_volatile) {
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
-  }
-  if (wide) {
-    StoreBaseDispWide(reg_obj, data.field_offset, reg_src);
-  } else {
-    Store32Disp(reg_obj, data.field_offset, reg_src);
-  }
-  if (data.is_volatile) {
+    StoreBaseDispVolatile(reg_obj, data.field_offset, reg_src, size);
     // A load might follow the volatile store so insert a StoreLoad barrier.
     GenMemBarrier(kStoreLoad);
+  } else {
+    StoreBaseDisp(reg_obj, data.field_offset, reg_src, size);
   }
-  if (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT)) {
+  if (ref) {
     MarkGCCard(reg_src, reg_obj);
   }
   return true;
@@ -282,11 +318,13 @@
   int opt_flags = mir->optimization_flags;
   uint32_t vB = mir->dalvikInsn.vB;
   uint32_t vC = mir->dalvikInsn.vC;
+  DCHECK(CheckCorePoolSanity()) << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " @ 0x:"
+                                << std::hex << current_dalvik_offset_;
 
   // Prep Src and Dest locations.
   int next_sreg = 0;
   int next_loc = 0;
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MIRGraph::GetDataFlowAttributes(opcode);
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
@@ -387,21 +425,11 @@
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantNoClobber(rl_result.reg, vB);
-      StoreValue(rl_dest, rl_result);
-      if (vB == 0) {
-        Workaround7250540(rl_dest, rl_result.reg);
-      }
+      GenConst(rl_dest, vB);
       break;
 
     case Instruction::CONST_HIGH16:
-      rl_result = EvalLoc(rl_dest, kAnyReg, true);
-      LoadConstantNoClobber(rl_result.reg, vB << 16);
-      StoreValue(rl_dest, rl_result);
-      if (vB == 0) {
-        Workaround7250540(rl_dest, rl_result.reg);
-      }
+      GenConst(rl_dest, vB << 16);
       break;
 
     case Instruction::CONST_WIDE_16:
@@ -910,6 +938,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
+  DCHECK(CheckCorePoolSanity());
 }  // NOLINT(readability/fn_size)
 
 // Process extended MIR instructions
@@ -939,7 +968,18 @@
     case kMirOpSelect:
       GenSelect(bb, mir);
       break;
+    case kMirOpPhi:
+    case kMirOpNop:
+    case kMirOpNullCheck:
+    case kMirOpRangeCheck:
+    case kMirOpDivZeroCheck:
+    case kMirOpCheck:
+    case kMirOpCheckPart2:
+      // Ignore these known opcodes
+      break;
     default:
+      // Give the backends a chance to handle unknown extended MIR opcodes.
+      GenMachineSpecificExtendedMethodMIR(bb, mir);
       break;
   }
 }
@@ -974,7 +1014,7 @@
   }
 
   // Free temp registers and reset redundant store tracking.
-  ClobberAllRegs();
+  ClobberAllTemps();
 
   if (bb->block_type == kEntryBlock) {
     ResetRegPool();
@@ -989,10 +1029,9 @@
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     ResetRegPool();
     if (cu_->disable_opt & (1 << kTrackLiveTemps)) {
-      ClobberAllRegs();
+      ClobberAllTemps();
       // Reset temp allocation to minimize differences when A/B testing.
-      reg_pool_->next_core_reg = 0;
-      reg_pool_->next_fp_reg = 0;
+      reg_pool_->ResetNextTemp();
     }
 
     if (cu_->disable_opt & (1 << kSuppressLoads)) {
@@ -1070,7 +1109,7 @@
   // Free temp registers and reset redundant store tracking.
   ResetRegPool();
   ResetDefTracking();
-  ClobberAllRegs();
+  ClobberAllTemps();
 
   return GenSpecialCase(bb, mir, special);
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 9283a29..4cebb7c 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -91,6 +91,7 @@
 
 // Common combo register usage patterns.
 #define REG_DEF01            (REG_DEF0 | REG_DEF1)
+#define REG_DEF012           (REG_DEF0 | REG_DEF1 | REG_DEF2)
 #define REG_DEF01_USE2       (REG_DEF0 | REG_DEF1 | REG_USE2)
 #define REG_DEF0_USE01       (REG_DEF0 | REG_USE01)
 #define REG_DEF0_USE0        (REG_DEF0 | REG_USE0)
@@ -167,10 +168,14 @@
 // Target-specific initialization.
 Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                            ArenaAllocator* const arena);
 Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena);
 
 // Utility macros to traverse the LIR list.
 #define NEXT_LIR(lir) (lir->next)
@@ -241,32 +246,203 @@
     };
 
     /*
-     * Data structure tracking the mapping between a Dalvik register (pair) and a
-     * native register (pair). The idea is to reuse the previously loaded value
-     * if possible, otherwise to keep the value in a native register as long as
-     * possible.
+     * Data structure tracking the mapping detween a Dalvik value (32 or 64 bits)
+     * and native register storage.  The primary purpose is to reuse previuosly
+     * loaded values, if possible, and otherwise to keep the value in register
+     * storage as long as possible.
+     *
+     * NOTE 1: wide_value refers to the width of the Dalvik value contained in
+     * this register (or pair).  For example, a 64-bit register containing a 32-bit
+     * Dalvik value would have wide_value==false even though the storage container itself
+     * is wide.  Similarly, a 32-bit register containing half of a 64-bit Dalvik value
+     * would have wide_value==true (and additionally would have its partner field set to the
+     * other half whose wide_value field would also be true.
+     *
+     * NOTE 2: In the case of a register pair, you can determine which of the partners
+     * is the low half by looking at the s_reg names.  The high s_reg will equal low_sreg + 1.
+     *
+     * NOTE 3: In the case of a 64-bit register holding a Dalvik wide value, wide_value
+     * will be true and partner==self.  s_reg refers to the low-order word of the Dalvik
+     * value, and the s_reg of the high word is implied (s_reg + 1).
+     *
+     * NOTE 4: The reg and is_temp fields should always be correct.  If is_temp is false no
+     * other fields have meaning. [perhaps not true, wide should work for promoted regs?]
+     * If is_temp==true and live==false, no other fields have
+     * meaning.  If is_temp==true and live==true, wide_value, partner, dirty, s_reg, def_start
+     * and def_end describe the relationship between the temp register/register pair and
+     * the Dalvik value[s] described by s_reg/s_reg+1.
+     *
+     * The fields used_storage, master_storage and storage_mask are used to track allocation
+     * in light of potential aliasing.  For example, consider Arm's d2, which overlaps s4 & s5.
+     * d2's storage mask would be 0x00000003, the two low-order bits denoting 64 bits of
+     * storage use.  For s4, it would be 0x0000001; for s5 0x00000002.  These values should not
+     * change once initialized.  The "used_storage" field tracks current allocation status.
+     * Although each record contains this field, only the field from the largest member of
+     * an aliased group is used.  In our case, it would be d2's.  The master_storage pointer
+     * of d2, s4 and s5 would all point to d2's used_storage field.  Each bit in a used_storage
+     * represents 32 bits of storage.  d2's used_storage would be initialized to 0xfffffffc.
+     * Then, if we wanted to determine whether s4 could be allocated, we would "and"
+     * s4's storage_mask with s4's *master_storage.  If the result is zero, s4 is free and
+     * to allocate: *master_storage |= storage_mask.  To free, *master_storage &= ~storage_mask.
+     *
+     * For an X86 vector register example, storage_mask would be:
+     *    0x00000001 for 32-bit view of xmm1
+     *    0x00000003 for 64-bit view of xmm1
+     *    0x0000000f for 128-bit view of xmm1
+     *    0x000000ff for 256-bit view of ymm1   // future expansion, if needed
+     *    0x0000ffff for 512-bit view of ymm1   // future expansion, if needed
+     *    0xffffffff for 1024-bit view of ymm1  // future expansion, if needed
+     *
+     * The "liveness" of a register is handled in a similar way.  The liveness_ storage is
+     * held in the widest member of an aliased set.  Note, though, that for a temp register to
+     * reused as live, it must both be marked live and the associated SReg() must match the
+     * desired s_reg.  This gets a little complicated when dealing with aliased registers.  All
+     * members of an aliased set will share the same liveness flags, but each will individually
+     * maintain s_reg_.  In this way we can know that at least one member of an
+     * aliased set is live, but will only fully match on the appropriate alias view.  For example,
+     * if Arm d1 is live as a double and has s_reg_ set to Dalvik v8 (which also implies v9
+     * because it is wide), its aliases s2 and s3 will show as live, but will have
+     * s_reg_ == INVALID_SREG.  An attempt to later AllocLiveReg() of v9 with a single-precision
+     * view will fail because although s3's liveness bit is set, its s_reg_ will not match v9.
+     * This will cause all members of the aliased set to be clobbered and AllocLiveReg() will
+     * report that v9 is currently not live as a single (which is what we want).
+     *
+     * NOTE: the x86 usage is still somewhat in flux.  There are competing notions of how
+     * to treat xmm registers:
+     *     1. Treat them all as 128-bits wide, but denote how much data used via bytes field.
+     *         o This more closely matches reality, but means you'd need to be able to get
+     *           to the associated RegisterInfo struct to figure out how it's being used.
+     *         o This is how 64-bit core registers will be used - always 64 bits, but the
+     *           "bytes" field will be 4 for 32-bit usage and 8 for 64-bit usage.
+     *     2. View the xmm registers based on contents.
+     *         o A single in a xmm2 register would be k32BitVector, while a double in xmm2 would
+     *           be a k64BitVector.
+     *         o Note that the two uses above would be considered distinct registers (but with
+     *           the aliasing mechanism, we could detect interference).
+     *         o This is how aliased double and single float registers will be handled on
+     *           Arm and MIPS.
+     * Working plan is, for all targets, to follow mechanism 1 for 64-bit core registers, and
+     * mechanism 2 for aliased float registers and x86 vector registers.
      */
-    struct RegisterInfo {
-      int reg;                    // Reg number
-      bool in_use;                // Has it been allocated?
-      bool is_temp;               // Can allocate as temp?
-      bool pair;                  // Part of a register pair?
-      int partner;                // If pair, other reg of pair.
-      bool live;                  // Is there an associated SSA name?
-      bool dirty;                 // If live, is it dirty?
-      int s_reg;                  // Name of live value.
-      LIR *def_start;             // Starting inst in last def sequence.
-      LIR *def_end;               // Ending inst in last def sequence.
+    class RegisterInfo {
+     public:
+      RegisterInfo(RegStorage r, uint64_t mask = ENCODE_ALL);
+      ~RegisterInfo() {}
+      static void* operator new(size_t size, ArenaAllocator* arena) {
+        return arena->Alloc(size, kArenaAllocRegAlloc);
+      }
+
+      bool InUse() { return (storage_mask_ & master_->used_storage_) != 0; }
+      void MarkInUse() { master_->used_storage_ |= storage_mask_; }
+      void MarkFree() { master_->used_storage_ &= ~storage_mask_; }
+      // No part of the containing storage is live in this view.
+      bool IsDead() { return (master_->liveness_ & storage_mask_) == 0; }
+      // Liveness of this view matches.  Note: not equivalent to !IsDead().
+      bool IsLive() { return (master_->liveness_ & storage_mask_) == storage_mask_; }
+      void MarkLive(int s_reg) {
+        // TODO: Anything useful to assert here?
+        s_reg_ = s_reg;
+        master_->liveness_ |= storage_mask_;
+      }
+      void MarkDead() {
+        if (SReg() != INVALID_SREG) {
+          s_reg_ = INVALID_SREG;
+          master_->liveness_ &= ~storage_mask_;
+          ResetDefBody();
+        }
+      }
+      RegStorage GetReg() { return reg_; }
+      void SetReg(RegStorage reg) { reg_ = reg; }
+      bool IsTemp() { return is_temp_; }
+      void SetIsTemp(bool val) { is_temp_ = val; }
+      bool IsWide() { return wide_value_; }
+      void SetIsWide(bool val) {
+        wide_value_ = val;
+        if (!val) {
+          // If not wide, reset partner to self.
+          SetPartner(GetReg());
+        }
+      }
+      bool IsDirty() { return dirty_; }
+      void SetIsDirty(bool val) { dirty_ = val; }
+      RegStorage Partner() { return partner_; }
+      void SetPartner(RegStorage partner) { partner_ = partner; }
+      int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; }
+      uint64_t DefUseMask() { return def_use_mask_; }
+      void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; }
+      RegisterInfo* Master() { return master_; }
+      void SetMaster(RegisterInfo* master) {
+        master_ = master;
+        if (master != this) {
+          master_->aliased_ = true;
+          DCHECK(alias_chain_ == nullptr);
+          alias_chain_ = master_->alias_chain_;
+          master_->alias_chain_ = this;
+        }
+      }
+      bool IsAliased() { return aliased_; }
+      RegisterInfo* GetAliasChain() { return alias_chain_; }
+      uint32_t StorageMask() { return storage_mask_; }
+      void SetStorageMask(uint32_t storage_mask) { storage_mask_ = storage_mask; }
+      LIR* DefStart() { return def_start_; }
+      void SetDefStart(LIR* def_start) { def_start_ = def_start; }
+      LIR* DefEnd() { return def_end_; }
+      void SetDefEnd(LIR* def_end) { def_end_ = def_end; }
+      void ResetDefBody() { def_start_ = def_end_ = nullptr; }
+
+
+     private:
+      RegStorage reg_;
+      bool is_temp_;               // Can allocate as temp?
+      bool wide_value_;            // Holds a Dalvik wide value (either itself, or part of a pair).
+      bool dirty_;                 // If live, is it dirty?
+      bool aliased_;               // Is this the master for other aliased RegisterInfo's?
+      RegStorage partner_;         // If wide_value, other reg of pair or self if 64-bit register.
+      int s_reg_;                  // Name of live value.
+      uint64_t def_use_mask_;      // Resources for this element.
+      uint32_t used_storage_;      // 1 bit per 4 bytes of storage. Unused by aliases.
+      uint32_t liveness_;          // 1 bit per 4 bytes of storage. Unused by aliases.
+      RegisterInfo* master_;       // Pointer to controlling storage mask.
+      uint32_t storage_mask_;      // Track allocation of sub-units.
+      LIR *def_start_;             // Starting inst in last def sequence.
+      LIR *def_end_;               // Ending inst in last def sequence.
+      RegisterInfo* alias_chain_;  // Chain of aliased registers.
     };
 
-    struct RegisterPool {
-       int num_core_regs;
-       RegisterInfo *core_regs;
-       int next_core_reg;
-       int num_fp_regs;
-       RegisterInfo *FPRegs;
-       int next_fp_reg;
-     };
+    class RegisterPool {
+     public:
+      RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
+                   const std::vector<RegStorage>& core_regs,
+                   const std::vector<RegStorage>& core64_regs,
+                   const std::vector<RegStorage>& sp_regs,
+                   const std::vector<RegStorage>& dp_regs,
+                   const std::vector<RegStorage>& reserved_regs,
+                   const std::vector<RegStorage>& reserved64_regs,
+                   const std::vector<RegStorage>& core_temps,
+                   const std::vector<RegStorage>& core64_temps,
+                   const std::vector<RegStorage>& sp_temps,
+                   const std::vector<RegStorage>& dp_temps);
+      ~RegisterPool() {}
+      static void* operator new(size_t size, ArenaAllocator* arena) {
+        return arena->Alloc(size, kArenaAllocRegAlloc);
+      }
+      void ResetNextTemp() {
+        next_core_reg_ = 0;
+        next_sp_reg_ = 0;
+        next_dp_reg_ = 0;
+      }
+      GrowableArray<RegisterInfo*> core_regs_;
+      int next_core_reg_;
+      GrowableArray<RegisterInfo*> core64_regs_;
+      int next_core64_reg_;
+      GrowableArray<RegisterInfo*> sp_regs_;    // Single precision float.
+      int next_sp_reg_;
+      GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
+      int next_dp_reg_;
+
+     private:
+      Mir2Lir* const m2l_;
+    };
 
     struct PromotionMap {
       RegLocationType core_location:3;
@@ -315,7 +491,8 @@
      public:
       LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
                   LIR* cont = nullptr) :
-        m2l_(m2l), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+        m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+          m2l->StartSlowPath(cont);
       }
       virtual ~LIRSlowPath() {}
       virtual void Compile() = 0;
@@ -324,10 +501,19 @@
         return arena->Alloc(size, kArenaAllocData);
       }
 
+      LIR *GetContinuationLabel() {
+        return cont_;
+      }
+
+      LIR *GetFromFast() {
+        return fromfast_;
+      }
+
      protected:
       LIR* GenerateTargetLabel(int opcode = kPseudoTargetLabel);
 
       Mir2Lir* const m2l_;
+      CompilationUnit* const cu_;
       const DexOffset current_dex_pc_;
       LIR* const fromfast_;
       LIR* const cont_;
@@ -339,7 +525,14 @@
       return *reinterpret_cast<const int32_t*>(switch_data);
     }
 
-    RegisterClass oat_reg_class_by_size(OpSize size) {
+    /*
+     * TODO: this is a trace JIT vestige, and its use should be reconsidered.  At the time
+     * it was introduced, it was intended to be a quick best guess of type without having to
+     * take the time to do type analysis.  Currently, though, we have a much better idea of
+     * the types of Dalvik virtual registers.  Instead of using this for a best guess, why not
+     * just use our knowledge of type to select the most appropriate register class?
+     */
+    RegisterClass RegClassBySize(OpSize size) {
       return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
               size == kSignedByte) ? kCoreReg : kAnyReg;
     }
@@ -406,7 +599,7 @@
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
-    void SetupResourceMasks(LIR* lir);
+    void SetupResourceMasks(LIR* lir, bool leave_mem_ref = false);
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
     void SetupRegMask(uint64_t* mask, int reg);
@@ -424,6 +617,7 @@
     LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2);
     LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta);
     LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi);
+    LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method);
     LIR* AddWordData(LIR* *constant_list_p, int value);
     LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
     void ProcessSwitchTables();
@@ -448,6 +642,12 @@
     LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
     void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+
+    virtual void StartSlowPath(LIR *label) {}
+    virtual void BeginInvoke(CallInfo* info) {}
+    virtual void EndInvoke(CallInfo* info) {}
+
+
     // Handle bookkeeping to convert a wide RegLocation to a narow RegLocation.  No code generated.
     RegLocation NarrowRegLoc(RegLocation loc);
 
@@ -455,85 +655,77 @@
     void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src);
     void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir);
     void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir);
-    void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
+    virtual void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir);
 
     // Shared by all targets - implemented in ralloc_util.cc
     int GetSRegHi(int lowSreg);
-    bool oat_live_out(int s_reg);
-    int oatSSASrc(MIR* mir, int num);
+    bool LiveOut(int s_reg);
     void SimpleRegAlloc();
     void ResetRegPool();
-    void CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num);
-    void DumpRegPool(RegisterInfo* p, int num_regs);
+    void CompilerInitPool(RegisterInfo* info, RegStorage* regs, int num);
+    void DumpRegPool(GrowableArray<RegisterInfo*>* regs);
     void DumpCoreRegPool();
     void DumpFpRegPool();
+    void DumpRegPools();
     /* Mark a temp register as dead.  Does not affect allocation state. */
-    void Clobber(int reg) {
-      ClobberBody(GetRegInfo(reg));
-    }
     void Clobber(RegStorage reg);
-    void ClobberSRegBody(RegisterInfo* p, int num_regs, int s_reg);
     void ClobberSReg(int s_reg);
+    void ClobberAliases(RegisterInfo* info);
     int SRegToPMap(int s_reg);
     void RecordCorePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedCoreReg(int s_reg);
-    void RecordFpPromotion(RegStorage reg, int s_reg);
+    void RecordSinglePromotion(RegStorage reg, int s_reg);
+    void RecordDoublePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedSingle(int s_reg);
-    RegStorage AllocPreservedDouble(int s_reg);
-    RegStorage AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
+    virtual RegStorage AllocPreservedDouble(int s_reg);
+    RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
+    virtual RegStorage AllocFreeTemp();
+    virtual RegStorage AllocTemp();
+    virtual RegStorage AllocTempWide();
+    virtual RegStorage AllocTempSingle();
     virtual RegStorage AllocTempDouble();
-    RegStorage AllocFreeTemp();
-    RegStorage AllocTemp();
-    RegStorage AllocTempFloat();
-    RegisterInfo* AllocLiveBody(RegisterInfo* p, int num_regs, int s_reg);
-    RegisterInfo* AllocLive(int s_reg, int reg_class);
-    void FreeTemp(int reg);
-    void FreeTemp(RegStorage reg);
-    RegisterInfo* IsLive(int reg);
-    bool IsLive(RegStorage reg);
-    RegisterInfo* IsTemp(int reg);
-    bool IsTemp(RegStorage reg);
-    RegisterInfo* IsPromoted(int reg);
+    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
+    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
+    void FlushReg(RegStorage reg);
+    void FlushRegWide(RegStorage reg);
+    RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
+    RegStorage FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg);
+    virtual void FreeTemp(RegStorage reg);
+    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
+    virtual bool IsLive(RegStorage reg);
+    virtual bool IsTemp(RegStorage reg);
     bool IsPromoted(RegStorage reg);
-    bool IsDirty(int reg);
     bool IsDirty(RegStorage reg);
-    void LockTemp(int reg);
     void LockTemp(RegStorage reg);
-    void ResetDef(int reg);
     void ResetDef(RegStorage reg);
-    void NullifyRange(LIR *start, LIR *finish, int s_reg1, int s_reg2);
+    void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    RegLocation WideToNarrow(RegLocation rl);
+    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
-    virtual void ResetDefLocWide(RegLocation rl);
+    void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
-    void ClobberAllRegs();
+    void ClobberAllTemps();
     void FlushSpecificReg(RegisterInfo* info);
-    void FlushAllRegsBody(RegisterInfo* info, int num_regs);
     void FlushAllRegs();
     bool RegClassMatches(int reg_class, RegStorage reg);
-    void MarkLive(RegStorage reg, int s_reg);
-    void MarkTemp(int reg);
+    void MarkLive(RegLocation loc);
     void MarkTemp(RegStorage reg);
-    void UnmarkTemp(int reg);
     void UnmarkTemp(RegStorage reg);
-    void MarkPair(int low_reg, int high_reg);
+    void MarkWide(RegStorage reg);
+    void MarkNarrow(RegStorage reg);
     void MarkClean(RegLocation loc);
     void MarkDirty(RegLocation loc);
-    void MarkInUse(int reg);
     void MarkInUse(RegStorage reg);
-    void CopyRegInfo(int new_reg, int old_reg);
-    void CopyRegInfo(RegStorage new_reg, RegStorage old_reg);
     bool CheckCorePoolSanity();
-    RegLocation UpdateLoc(RegLocation loc);
+    virtual RegLocation UpdateLoc(RegLocation loc);
     virtual RegLocation UpdateLocWide(RegLocation loc);
     RegLocation UpdateRawLoc(RegLocation loc);
 
     /**
-     * @brief Used to load register location into a typed temporary or pair of temporaries.
+     * @brief Used to prepare a register location to receive a wide value.
      * @see EvalLoc
-     * @param loc The register location to load from.
+     * @param loc the location where the value will be stored.
      * @param reg_class Type of register needed.
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register pairs.
@@ -541,8 +733,8 @@
     virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
 
     /**
-     * @brief Used to load register location into a typed temporary.
-     * @param loc The register location to load from.
+     * @brief Used to prepare a register location to receive a value.
+     * @param loc the location where the value will be stored.
      * @param reg_class Type of register needed.
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register.
@@ -556,14 +748,14 @@
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(bool is_double);
     RegLocation GetReturn(bool is_float);
-    RegisterInfo* GetRegInfo(int reg);
+    RegisterInfo* GetRegInfo(RegStorage reg);
 
     // Shared by all targets - implemented in gen_common.cc.
     void AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume = nullptr);
     bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
                           RegLocation rl_src, RegLocation rl_dest, int lit);
     bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
-    void HandleSlowPaths();
+    virtual void HandleSlowPaths();
     void GenBarrier();
     void GenDivZeroException();
     // c_code holds condition code that's generated from testing divisor against 0.
@@ -583,7 +775,7 @@
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src,
                                  LIR* taken, LIR* fall_through);
-    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+    virtual void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
     void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
                          RegLocation rl_src);
     void GenNewArray(uint32_t type_idx, RegLocation rl_dest,
@@ -608,16 +800,17 @@
     void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src);
     void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest,
                       RegLocation rl_src1, RegLocation rl_src2);
-    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+    virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift);
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_src2);
-    void GenConversionCall(ThreadOffset<4> func_offset, RegLocation rl_dest,
+    template <size_t pointer_size>
+    void GenConversionCall(ThreadOffset<pointer_size> func_offset, RegLocation rl_dest,
                            RegLocation rl_src);
-    void GenSuspendTest(int opt_flags);
-    void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+    virtual void GenSuspendTest(int opt_flags);
+    virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target);
 
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
@@ -625,51 +818,72 @@
                        RegLocation rl_src1, RegLocation rl_src2);
 
     // Shared by all targets - implemented in gen_invoke.cc.
-    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+    template <size_t pointer_size>
+    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<pointer_size> helper_offset, bool safepoint_pc,
                     bool use_link = true);
     RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
-    void CallRuntimeHelper(ThreadOffset<4> helper_offset, bool safepoint_pc);
-    void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
-    void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
-    void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
+    RegStorage CallHelperSetup(ThreadOffset<8> helper_offset);
+    template <size_t pointer_size>
+    void CallRuntimeHelper(ThreadOffset<pointer_size> helper_offset, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperImm(ThreadOffset<pointer_size> helper_offset, int arg0, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegLocation(ThreadOffset<pointer_size> helper_offset, RegLocation arg0,
                                       bool safepoint_pc);
-    void CallRuntimeHelperImmImm(ThreadOffset<4> helper_offset, int arg0, int arg1,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmImm(ThreadOffset<pointer_size> helper_offset, int arg0, int arg1,
                                  bool safepoint_pc);
-    void CallRuntimeHelperImmRegLocation(ThreadOffset<4> helper_offset, int arg0,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_offset, int arg0,
                                          RegLocation arg1, bool safepoint_pc);
-    void CallRuntimeHelperRegLocationImm(ThreadOffset<4> helper_offset, RegLocation arg0,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegLocationImm(ThreadOffset<pointer_size> helper_offset, RegLocation arg0,
                                          int arg1, bool safepoint_pc);
-    void CallRuntimeHelperImmReg(ThreadOffset<4> helper_offset, int arg0, RegStorage arg1,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmReg(ThreadOffset<pointer_size> helper_offset, int arg0, RegStorage arg1,
                                  bool safepoint_pc);
-    void CallRuntimeHelperRegImm(ThreadOffset<4> helper_offset, RegStorage arg0, int arg1,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0, int arg1,
                                  bool safepoint_pc);
-    void CallRuntimeHelperImmMethod(ThreadOffset<4> helper_offset, int arg0,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmMethod(ThreadOffset<pointer_size> helper_offset, int arg0,
                                     bool safepoint_pc);
-    void CallRuntimeHelperRegMethod(ThreadOffset<4> helper_offset, RegStorage arg0,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                     bool safepoint_pc);
-    void CallRuntimeHelperRegMethodRegLocation(ThreadOffset<4> helper_offset, RegStorage arg0,
-                                               RegLocation arg2, bool safepoint_pc);
-    void CallRuntimeHelperRegLocationRegLocation(ThreadOffset<4> helper_offset,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegMethodRegLocation(ThreadOffset<pointer_size> helper_offset,
+                                               RegStorage arg0, RegLocation arg2, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                  RegLocation arg0, RegLocation arg1,
                                                  bool safepoint_pc);
-    void CallRuntimeHelperRegReg(ThreadOffset<4> helper_offset, RegStorage arg0, RegStorage arg1,
-                                 bool safepoint_pc);
-    void CallRuntimeHelperRegRegImm(ThreadOffset<4> helper_offset, RegStorage arg0, RegStorage arg1,
-                                    int arg2, bool safepoint_pc);
-    void CallRuntimeHelperImmMethodRegLocation(ThreadOffset<4> helper_offset, int arg0,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
+                                 RegStorage arg1, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
+                                    RegStorage arg1, int arg2, bool safepoint_pc);
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmMethodRegLocation(ThreadOffset<pointer_size> helper_offset, int arg0,
                                                RegLocation arg2, bool safepoint_pc);
-    void CallRuntimeHelperImmMethodImm(ThreadOffset<4> helper_offset, int arg0, int arg2,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmMethodImm(ThreadOffset<pointer_size> helper_offset, int arg0, int arg2,
                                        bool safepoint_pc);
-    void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<4> helper_offset,
+    template <size_t pointer_size>
+    void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                     int arg0, RegLocation arg1, RegLocation arg2,
                                                     bool safepoint_pc);
-    void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<4> helper_offset,
+    template <size_t pointer_size>
+    void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                             RegLocation arg0, RegLocation arg1,
                                                             RegLocation arg2,
                                                             bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
-    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+    virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
@@ -716,7 +930,7 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    int LoadArgRegs(CallInfo* info, int call_state,
+    virtual int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
                     uint32_t vtable_idx,
@@ -726,41 +940,41 @@
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
     void LoadCurrMethodDirect(RegStorage r_tgt);
-    LIR* LoadConstant(RegStorage r_dest, int value);
+    virtual LIR* LoadConstant(RegStorage r_dest, int value);
     // Natural word size.
-    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kWord, INVALID_SREG);
+    virtual LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+      return LoadBaseDisp(r_base, displacement, r_dest, kWord);
     }
     // Load 32 bits, regardless of target.
-    LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
-      return LoadBaseDisp(r_base, displacement, r_dest, k32, INVALID_SREG);
+    virtual LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
+      return LoadBaseDisp(r_base, displacement, r_dest, k32);
     }
     // Load a reference at base + displacement and decompress into register.
-    LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kReference, INVALID_SREG);
+    virtual LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+      return LoadBaseDisp(r_base, displacement, r_dest, kReference);
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 64-bit memory storage.
-    RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
+    virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirect(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
-    void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectFixed(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 64-bit memory storage.
-    void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest);
     // Load Dalvik value with 64-bit memory storage.
-    void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
+    virtual void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
     // Store an item of natural word size.
-    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, kWord);
     }
     // Store an uncompressed reference into a compressed 32-bit container.
-    LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, kReference);
     }
     // Store 32 bits, regardless of target.
-    LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
+    virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, k32);
     }
 
@@ -769,7 +983,7 @@
      * @param rl_dest The destination dalvik register location.
      * @param rl_src The source register location. Can be either physical register or dalvik register.
      */
-    void StoreValue(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreValue(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
@@ -778,7 +992,7 @@
      * @param rl_src The source register location. Can be either physical register or dalvik
      *  register.
      */
-    void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store to a destination as per bytecode semantics.
@@ -790,7 +1004,7 @@
      * register value that now needs to be properly registered.  This is used to avoid an
      * extra register copy that would result if StoreValue was called.
      */
-    void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
 
     /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
@@ -802,14 +1016,14 @@
      * register values that now need to be properly registered.  This is used to avoid an
      * extra pair of register copies that would result if StoreValueWide was called.
      */
-    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+    virtual void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
-    void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+    virtual void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
     bool MethodBlockCodeGen(BasicBlock* bb);
     bool SpecialMIR2LIR(const InlineMethod& special);
-    void MethodMIR2LIR();
+    virtual void MethodMIR2LIR();
     // Update LIR for verbose listings.
     void UpdateLIROffsets();
 
@@ -860,36 +1074,31 @@
                                     RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
     virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
     virtual LIR* CheckSuspendUsingLoad() = 0;
+
     virtual RegStorage LoadHelper(ThreadOffset<4> offset) = 0;
-    virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                              int s_reg) = 0;
-    virtual LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) = 0;
+    virtual RegStorage LoadHelper(ThreadOffset<8> offset) = 0;
+
+    virtual LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                                      OpSize size) = 0;
+    virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) = 0;
     virtual LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                  int scale, OpSize size) = 0;
     virtual LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, RegStorage r_dest_hi,
-                                     OpSize size, int s_reg) = 0;
+                                     int displacement, RegStorage r_dest, OpSize size) = 0;
     virtual LIR* LoadConstantNoClobber(RegStorage r_dest, int value) = 0;
     virtual LIR* LoadConstantWide(RegStorage r_dest, int64_t value) = 0;
+    virtual LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                                       OpSize size) = 0;
     virtual LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) = 0;
-    virtual LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) = 0;
     virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                   int scale, OpSize size) = 0;
     virtual LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, RegStorage r_src_hi,
-                                      OpSize size, int s_reg) = 0;
+                                      int displacement, RegStorage r_src, OpSize size) = 0;
     virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
-    virtual bool IsFpReg(int reg) = 0;
-    virtual bool IsFpReg(RegStorage reg) = 0;
-    virtual bool SameRegType(int reg1, int reg2) = 0;
-    virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class) = 0;
-    virtual RegStorage AllocTypedTempWide(bool fp_hint, int reg_class) = 0;
-    // TODO: elminate S2d.
-    virtual int S2d(int low_reg, int high_reg) = 0;
     virtual RegStorage TargetReg(SpecialTargetRegister reg) = 0;
     virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
     virtual RegLocation GetReturnAlt() = 0;
@@ -898,17 +1107,13 @@
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
-    // TODO: use to reduce/eliminate xx_FPREG() macro use.
-    virtual uint32_t FpRegMask() = 0;
-    virtual uint64_t GetRegMaskCommon(int reg) = 0;
+    virtual uint64_t GetRegMaskCommon(RegStorage reg) = 0;
     virtual void AdjustSpillMask() = 0;
     virtual void ClobberCallerSave() = 0;
-    virtual void FlushReg(RegStorage reg) = 0;
-    virtual void FlushRegWide(RegStorage reg) = 0;
     virtual void FreeCallTemps() = 0;
-    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) = 0;
     virtual void LockCallTemps() = 0;
-    virtual void MarkPreservedSingle(int v_reg, int reg) = 0;
+    virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0;
+    virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0;
     virtual void CompilerInitializeRegAlloc() = 0;
 
     // Required for target - miscellaneous.
@@ -923,6 +1128,11 @@
     virtual int GetInsnSize(LIR* lir) = 0;
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
+    // Check support for volatile load/store of a given size.
+    virtual bool SupportsVolatileLoadStore(OpSize size) = 0;
+    // Get the register class for load/store of a field.
+    virtual RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) = 0;
+
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2) = 0;
@@ -960,6 +1170,7 @@
     virtual bool GenInlinedSqrt(CallInfo* info) = 0;
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
+    virtual void GenNotLong(RegLocation rl_dest, RegLocation rl_src) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
     virtual void GenOrLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
@@ -967,6 +1178,8 @@
                             RegLocation rl_src2) = 0;
     virtual void GenXorLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
+    virtual void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2, bool is_div) = 0;
     virtual RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
                                   bool is_div) = 0;
     virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit,
@@ -1006,6 +1219,14 @@
     virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0;
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
 
+    /*
+     * @brief Handle Machine Specific MIR Extended opcodes.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is not standard extended MIR.
+     * @note Base class implementation will abort for unknown opcodes.
+     */
+    virtual void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+
     /**
      * @brief Lowers the kMirOpSelect MIR into LIR.
      * @param bb The basic block in which the MIR is from.
@@ -1020,8 +1241,9 @@
      * barrier, then it will be used as such. Otherwise, a new LIR will be generated
      * that can keep the semantics.
      * @param barrier_kind The kind of memory barrier to generate.
+     * @return whether a new instruction was generated.
      */
-    virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
+    virtual bool GenMemBarrier(MemBarrierKind barrier_kind) = 0;
 
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
@@ -1095,12 +1317,14 @@
                              RegStorage r_src2) = 0;
     virtual LIR* OpTestSuspend(LIR* target) = 0;
     virtual LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) = 0;
+    virtual LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) = 0;
     virtual LIR* OpVldm(RegStorage r_base, int count) = 0;
     virtual LIR* OpVstm(RegStorage r_base, int count) = 0;
     virtual void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale,
                        int offset) = 0;
     virtual void OpRegCopyWide(RegStorage dest, RegStorage src) = 0;
     virtual void OpTlsCmp(ThreadOffset<4> offset, int val) = 0;
+    virtual void OpTlsCmp(ThreadOffset<8> offset, int val) = 0;
     virtual bool InexpensiveConstantInt(int32_t value) = 0;
     virtual bool InexpensiveConstantFloat(int32_t value) = 0;
     virtual bool InexpensiveConstantLong(int64_t value) = 0;
@@ -1148,14 +1372,18 @@
      * @param loc location of result
      * @returns update location
      */
-    RegLocation ForceTemp(RegLocation loc);
+    virtual RegLocation ForceTemp(RegLocation loc);
 
     /*
      * @brief Force a wide location (in registers) into temporary registers
      * @param loc location of result
      * @returns update location
      */
-    RegLocation ForceTempWide(RegLocation loc);
+    virtual RegLocation ForceTempWide(RegLocation loc);
+
+    static constexpr OpSize LoadStoreOpSize(bool wide, bool ref) {
+      return wide ? k64 : ref ? kReference : k32;
+    }
 
     virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src);
@@ -1197,13 +1425,8 @@
      */
     virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
-  private:
+  protected:
     void ClobberBody(RegisterInfo* p);
-    void ResetDefBody(RegisterInfo* p) {
-      p->def_start = NULL;
-      p->def_end = NULL;
-    }
-
     void SetCurrentDexPc(DexOffset dexpc) {
       current_dalvik_offset_ = dexpc;
     }
@@ -1224,7 +1447,7 @@
      * @param wide Whether the argument is 64-bit or not.
      * @return Returns the register (or register pair) for the loaded argument.
      */
-    RegStorage LoadArg(int in_position, bool wide = false);
+    RegStorage LoadArg(int in_position, RegisterClass reg_class, bool wide = false);
 
     /**
      * @brief Used to load a VR argument directly to a specified register location.
@@ -1261,7 +1484,14 @@
 
     // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
     // kArg2 as temp.
-    void CopyToArgumentRegs(RegStorage arg0, RegStorage arg1);
+    virtual void CopyToArgumentRegs(RegStorage arg0, RegStorage arg1);
+
+    /**
+     * @brief Load Constant into RegLocation
+     * @param rl_dest Destination RegLocation
+     * @param value Constant value
+     */
+    virtual void GenConst(RegLocation rl_dest, int value);
 
   public:
     // TODO: add accessors for these.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 6455572..8c0f2bb 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -30,7 +30,7 @@
 void Mir2Lir::ResetRegPool() {
   GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
   for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
-    info->in_use = false;
+    info->MarkFree();
   }
   // Reset temp tracking sanity check.
   if (kIsDebugBuild) {
@@ -38,67 +38,157 @@
   }
 }
 
- /*
-  * Set up temp & preserved register pools specialized by target.
-  * Note: num_regs may be zero.
-  */
-void Mir2Lir::CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) {
-  for (int i = 0; i < num; i++) {
-    uint32_t reg_number = reg_nums[i];
-    regs[i].reg = reg_number;
-    regs[i].in_use = false;
-    regs[i].is_temp = false;
-    regs[i].pair = false;
-    regs[i].live = false;
-    regs[i].dirty = false;
-    regs[i].s_reg = INVALID_SREG;
-    size_t map_size = reginfo_map_.Size();
-    if (reg_number >= map_size) {
-      for (uint32_t i = 0; i < ((reg_number - map_size) + 1); i++) {
-        reginfo_map_.Insert(NULL);
-      }
-    }
-    reginfo_map_.Put(reg_number, &regs[i]);
+Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask)
+  : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r),
+    s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr),
+    def_end_(nullptr), alias_chain_(nullptr) {
+  switch (r.StorageSize()) {
+    case 0: storage_mask_ = 0xffffffff; break;
+    case 4: storage_mask_ = 0x00000001; break;
+    case 8: storage_mask_ = 0x00000003; break;
+    case 16: storage_mask_ = 0x0000000f; break;
+    case 32: storage_mask_ = 0x000000ff; break;
+    case 64: storage_mask_ = 0x0000ffff; break;
+    case 128: storage_mask_ = 0xffffffff; break;
   }
+  used_storage_ = r.Valid() ? ~storage_mask_ : storage_mask_;
+  liveness_ = used_storage_;
 }
 
-void Mir2Lir::DumpRegPool(RegisterInfo* p, int num_regs) {
+Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
+                                    const std::vector<RegStorage>& core_regs,
+                                    const std::vector<RegStorage>& core64_regs,
+                                    const std::vector<RegStorage>& sp_regs,
+                                    const std::vector<RegStorage>& dp_regs,
+                                    const std::vector<RegStorage>& reserved_regs,
+                                    const std::vector<RegStorage>& reserved64_regs,
+                                    const std::vector<RegStorage>& core_temps,
+                                    const std::vector<RegStorage>& core64_temps,
+                                    const std::vector<RegStorage>& sp_temps,
+                                    const std::vector<RegStorage>& dp_temps) :
+    core_regs_(arena, core_regs.size()), next_core_reg_(0),
+    core64_regs_(arena, core64_regs.size()), next_core64_reg_(0),
+    sp_regs_(arena, sp_regs.size()), next_sp_reg_(0),
+    dp_regs_(arena, dp_regs.size()), next_dp_reg_(0), m2l_(m2l)  {
+  // Initialize the fast lookup map.
+  m2l_->reginfo_map_.Reset();
+  if (kIsDebugBuild) {
+    m2l_->reginfo_map_.Resize(RegStorage::kMaxRegs);
+    for (unsigned i = 0; i < RegStorage::kMaxRegs; i++) {
+      m2l_->reginfo_map_.Insert(nullptr);
+    }
+  } else {
+    m2l_->reginfo_map_.SetSize(RegStorage::kMaxRegs);
+  }
+
+  // Construct the register pool.
+  for (RegStorage reg : core_regs) {
+    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
+    m2l_->reginfo_map_.Put(reg.GetReg(), info);
+    core_regs_.Insert(info);
+  }
+  for (RegStorage reg : core64_regs) {
+    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
+    m2l_->reginfo_map_.Put(reg.GetReg(), info);
+    core64_regs_.Insert(info);
+  }
+  for (RegStorage reg : sp_regs) {
+    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
+    m2l_->reginfo_map_.Put(reg.GetReg(), info);
+    sp_regs_.Insert(info);
+  }
+  for (RegStorage reg : dp_regs) {
+    RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg));
+    m2l_->reginfo_map_.Put(reg.GetReg(), info);
+    dp_regs_.Insert(info);
+  }
+
+  // Keep special registers from being allocated.
+  for (RegStorage reg : reserved_regs) {
+    m2l_->MarkInUse(reg);
+  }
+  for (RegStorage reg : reserved64_regs) {
+    m2l_->MarkInUse(reg);
+  }
+
+  // Mark temp regs - all others not in use can be used for promotion
+  for (RegStorage reg : core_temps) {
+    m2l_->MarkTemp(reg);
+  }
+  for (RegStorage reg : core64_temps) {
+    m2l_->MarkTemp(reg);
+  }
+  for (RegStorage reg : sp_temps) {
+    m2l_->MarkTemp(reg);
+  }
+  for (RegStorage reg : dp_temps) {
+    m2l_->MarkTemp(reg);
+  }
+
+  // Add an entry for InvalidReg with zero'd mask.
+  RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
+  m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+}
+
+void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
   LOG(INFO) << "================================================";
-  for (int i = 0; i < num_regs; i++) {
+  GrowableArray<RegisterInfo*>::Iterator it(regs);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     LOG(INFO) << StringPrintf(
-        "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d",
-        p[i].reg, p[i].is_temp, p[i].in_use, p[i].pair, p[i].partner,
-        p[i].live, p[i].dirty, p[i].s_reg);
+        "R[%d:%d:%c]: T:%d, U:%d, W:%d, p:%d, LV:%d, D:%d, SR:%d, DEF:%d",
+        info->GetReg().GetReg(), info->GetReg().GetRegNum(), info->GetReg().IsFloat() ?  'f' : 'c',
+        info->IsTemp(), info->InUse(), info->IsWide(), info->Partner().GetReg(), info->IsLive(),
+        info->IsDirty(), info->SReg(), info->DefStart() != nullptr);
   }
   LOG(INFO) << "================================================";
 }
 
 void Mir2Lir::DumpCoreRegPool() {
-  DumpRegPool(reg_pool_->core_regs, reg_pool_->num_core_regs);
+  DumpRegPool(&reg_pool_->core_regs_);
 }
 
 void Mir2Lir::DumpFpRegPool() {
-  DumpRegPool(reg_pool_->FPRegs, reg_pool_->num_fp_regs);
+  DumpRegPool(&reg_pool_->sp_regs_);
+  DumpRegPool(&reg_pool_->dp_regs_);
+}
+
+void Mir2Lir::DumpRegPools() {
+  LOG(INFO) << "Core registers";
+  DumpCoreRegPool();
+  LOG(INFO) << "FP registers";
+  DumpFpRegPool();
 }
 
 void Mir2Lir::Clobber(RegStorage reg) {
-  if (reg.IsPair()) {
-    ClobberBody(GetRegInfo(reg.GetLowReg()));
-    ClobberBody(GetRegInfo(reg.GetHighReg()));
+  if (UNLIKELY(reg.IsPair())) {
+    DCHECK(!GetRegInfo(reg.GetLow())->IsAliased());
+    Clobber(reg.GetLow());
+    DCHECK(!GetRegInfo(reg.GetHigh())->IsAliased());
+    Clobber(reg.GetHigh());
   } else {
-    ClobberBody(GetRegInfo(reg.GetReg()));
+    RegisterInfo* info = GetRegInfo(reg);
+    if (info->IsTemp() && !info->IsDead()) {
+      if (info->GetReg() != info->Partner()) {
+        ClobberBody(GetRegInfo(info->Partner()));
+      }
+      ClobberBody(info);
+      if (info->IsAliased()) {
+        ClobberAliases(info);
+      } else {
+        RegisterInfo* master = info->Master();
+        if (info != master) {
+          ClobberBody(info->Master());
+        }
+      }
+    }
   }
 }
 
-void Mir2Lir::ClobberSRegBody(RegisterInfo* p, int num_regs, int s_reg) {
-  for (int i = 0; i< num_regs; i++) {
-    if (p[i].s_reg == s_reg) {
-      if (p[i].is_temp) {
-        p[i].live = false;
-      }
-      p[i].def_start = NULL;
-      p[i].def_end = NULL;
-    }
+void Mir2Lir::ClobberAliases(RegisterInfo* info) {
+  for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
+       alias = alias->GetAliasChain()) {
+    DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
+    ClobberBody(alias);
   }
 }
 
@@ -114,14 +204,25 @@
  * addressed.
  */
 void Mir2Lir::ClobberSReg(int s_reg) {
-  /* Reset live temp tracking sanity checker */
-  if (kIsDebugBuild) {
-    if (s_reg == live_sreg_) {
+  if (s_reg != INVALID_SREG) {
+    if (kIsDebugBuild && s_reg == live_sreg_) {
       live_sreg_ = INVALID_SREG;
     }
+    GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+    for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+      if (info->SReg() == s_reg) {
+        if (info->GetReg() != info->Partner()) {
+          // Dealing with a pair - clobber the other half.
+          DCHECK(!info->IsAliased());
+          ClobberBody(GetRegInfo(info->Partner()));
+        }
+        ClobberBody(info);
+        if (info->IsAliased()) {
+          ClobberAliases(info);
+        }
+      }
+    }
   }
-  ClobberSRegBody(reg_pool_->core_regs, reg_pool_->num_core_regs, s_reg);
-  ClobberSRegBody(reg_pool_->FPRegs, reg_pool_->num_fp_regs, s_reg);
 }
 
 /*
@@ -153,11 +254,12 @@
   }
 }
 
+// TODO: refactor following Alloc/Record routines - much commonality.
 void Mir2Lir::RecordCorePromotion(RegStorage reg, int s_reg) {
   int p_map_idx = SRegToPMap(s_reg);
   int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int reg_num = reg.GetReg();
-  GetRegInfo(reg_num)->in_use = true;
+  int reg_num = reg.GetRegNum();
+  GetRegInfo(reg)->MarkInUse();
   core_spill_mask_ |= (1 << reg_num);
   // Include reg for later sort
   core_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1)));
@@ -166,13 +268,13 @@
   promotion_map_[p_map_idx].core_reg = reg_num;
 }
 
-/* Reserve a callee-save register.  Return -1 if none available */
+/* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
   RegStorage res;
-  RegisterInfo* core_regs = reg_pool_->core_regs;
-  for (int i = 0; i < reg_pool_->num_core_regs; i++) {
-    if (!core_regs[i].is_temp && !core_regs[i].in_use) {
-      res = RegStorage::Solo32(core_regs[i].reg);
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (!info->IsTemp() && !info->InUse()) {
+      res = info->GetReg();
       RecordCorePromotion(res, s_reg);
       break;
     }
@@ -180,302 +282,299 @@
   return res;
 }
 
-void Mir2Lir::RecordFpPromotion(RegStorage reg, int s_reg) {
+void Mir2Lir::RecordSinglePromotion(RegStorage reg, int s_reg) {
   int p_map_idx = SRegToPMap(s_reg);
   int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int reg_num = reg.GetReg();
-  GetRegInfo(reg_num)->in_use = true;
-  MarkPreservedSingle(v_reg, reg_num);
+  GetRegInfo(reg)->MarkInUse();
+  MarkPreservedSingle(v_reg, reg);
   promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].FpReg = reg_num;
+  promotion_map_[p_map_idx].FpReg = reg.GetReg();
 }
 
-// Reserve a callee-save fp single register.
+// Reserve a callee-save sp single register.
 RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
   RegStorage res;
-  RegisterInfo* FPRegs = reg_pool_->FPRegs;
-  for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-    if (!FPRegs[i].is_temp && !FPRegs[i].in_use) {
-      res = RegStorage::Solo32(FPRegs[i].reg);
-      RecordFpPromotion(res, s_reg);
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (!info->IsTemp() && !info->InUse()) {
+      res = info->GetReg();
+      RecordSinglePromotion(res, s_reg);
       break;
     }
   }
   return res;
 }
 
-/*
- * Somewhat messy code here.  We want to allocate a pair of contiguous
- * physical single-precision floating point registers starting with
- * an even numbered reg.  It is possible that the paired s_reg (s_reg+1)
- * has already been allocated - try to fit if possible.  Fail to
- * allocate if we can't meet the requirements for the pair of
- * s_reg<=sX[even] & (s_reg+1)<= sX+1.
- */
-// TODO: needs rewrite to support non-backed 64-bit float regs.
+void Mir2Lir::RecordDoublePromotion(RegStorage reg, int s_reg) {
+  int p_map_idx = SRegToPMap(s_reg);
+  int v_reg = mir_graph_->SRegToVReg(s_reg);
+  GetRegInfo(reg)->MarkInUse();
+  MarkPreservedDouble(v_reg, reg);
+  promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+  promotion_map_[p_map_idx].FpReg = reg.GetReg();
+}
+
+// Reserve a callee-save dp solo register.
 RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) {
   RegStorage res;
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  int p_map_idx = SRegToPMap(s_reg);
-  if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
-    // Upper reg is already allocated.  Can we fit?
-    int high_reg = promotion_map_[p_map_idx+1].FpReg;
-    if ((high_reg & 1) == 0) {
-      // High reg is even - fail.
-      return res;  // Invalid.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (!info->IsTemp() && !info->InUse()) {
+      res = info->GetReg();
+      RecordDoublePromotion(res, s_reg);
+      break;
     }
-    // Is the low reg of the pair free?
-    RegisterInfo* p = GetRegInfo(high_reg-1);
-    if (p->in_use || p->is_temp) {
-      // Already allocated or not preserved - fail.
-      return res;  // Invalid.
-    }
-    // OK - good to go.
-    res = RegStorage(RegStorage::k64BitPair, p->reg, p->reg + 1);
-    p->in_use = true;
-    DCHECK_EQ((res.GetReg() & 1), 0);
-    MarkPreservedSingle(v_reg, res.GetReg());
-  } else {
-    RegisterInfo* FPRegs = reg_pool_->FPRegs;
-    for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-      if (!FPRegs[i].is_temp && !FPRegs[i].in_use &&
-        ((FPRegs[i].reg & 0x1) == 0x0) &&
-        !FPRegs[i+1].is_temp && !FPRegs[i+1].in_use &&
-        ((FPRegs[i+1].reg & 0x1) == 0x1) &&
-        (FPRegs[i].reg + 1) == FPRegs[i+1].reg) {
-        res = RegStorage(RegStorage::k64BitPair, FPRegs[i].reg, FPRegs[i].reg+1);
-        FPRegs[i].in_use = true;
-        MarkPreservedSingle(v_reg, res.GetLowReg());
-        FPRegs[i+1].in_use = true;
-        DCHECK_EQ(res.GetLowReg() + 1, FPRegs[i+1].reg);
-        MarkPreservedSingle(v_reg+1, res.GetLowReg() + 1);
-        break;
-      }
-    }
-  }
-  if (res.Valid()) {
-    promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].FpReg = res.GetLowReg();
-    promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].FpReg = res.GetLowReg() + 1;
   }
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
-                                  bool required) {
+
+RegStorage Mir2Lir::AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required) {
+  int num_regs = regs.Size();
   int next = *next_temp;
   for (int i = 0; i< num_regs; i++) {
     if (next >= num_regs)
       next = 0;
-    if (p[next].is_temp && !p[next].in_use && !p[next].live) {
-      Clobber(p[next].reg);
-      p[next].in_use = true;
-      p[next].pair = false;
+    RegisterInfo* info = regs.Get(next);
+    // Try to allocate a register that doesn't hold a live value.
+    if (info->IsTemp() && !info->InUse() && info->IsDead()) {
+      Clobber(info->GetReg());
+      info->MarkInUse();
+      /*
+       * NOTE: "wideness" is an attribute of how the container is used, not its physical size.
+       * The caller will set wideness as appropriate.
+       */
+      info->SetIsWide(false);
       *next_temp = next + 1;
-      return RegStorage::Solo32(p[next].reg);
+      return info->GetReg();
     }
     next++;
   }
   next = *next_temp;
+  // No free non-live regs.  Anything we can kill?
   for (int i = 0; i< num_regs; i++) {
     if (next >= num_regs)
       next = 0;
-    if (p[next].is_temp && !p[next].in_use) {
-      Clobber(p[next].reg);
-      p[next].in_use = true;
-      p[next].pair = false;
+    RegisterInfo* info = regs.Get(next);
+    if (info->IsTemp() && !info->InUse()) {
+      // Got one.  Kill it.
+      ClobberSReg(info->SReg());
+      Clobber(info->GetReg());
+      info->MarkInUse();
+      if (info->IsWide()) {
+        RegisterInfo* partner = GetRegInfo(info->Partner());
+        DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
+        DCHECK(partner->IsWide());
+        info->SetIsWide(false);
+        partner->SetIsWide(false);
+      }
       *next_temp = next + 1;
-      return RegStorage::Solo32(p[next].reg);
+      return info->GetReg();
     }
     next++;
   }
   if (required) {
     CodegenDump();
-    DumpRegPool(reg_pool_->core_regs,
-          reg_pool_->num_core_regs);
+    DumpRegPools();
     LOG(FATAL) << "No free temp registers";
   }
   return RegStorage::InvalidReg();  // No register available
 }
 
-// REDO: too many assumptions.
-// Virtualize - this is target dependent.
-RegStorage Mir2Lir::AllocTempDouble() {
-  RegisterInfo* p = reg_pool_->FPRegs;
-  int num_regs = reg_pool_->num_fp_regs;
-  /* Start looking at an even reg */
-  int next = reg_pool_->next_fp_reg & ~0x1;
-
-  // First try to avoid allocating live registers
-  for (int i = 0; i < num_regs; i+=2) {
-    if (next >= num_regs)
-      next = 0;
-    if ((p[next].is_temp && !p[next].in_use && !p[next].live) &&
-      (p[next+1].is_temp && !p[next+1].in_use && !p[next+1].live)) {
-      Clobber(p[next].reg);
-      Clobber(p[next+1].reg);
-      p[next].in_use = true;
-      p[next+1].in_use = true;
-      DCHECK_EQ((p[next].reg+1), p[next+1].reg);
-      DCHECK_EQ((p[next].reg & 0x1), 0);
-      reg_pool_->next_fp_reg = next + 2;
-      if (reg_pool_->next_fp_reg >= num_regs) {
-        reg_pool_->next_fp_reg = 0;
-      }
-      // FIXME: should return k64BitSolo.
-      return RegStorage(RegStorage::k64BitPair, p[next].reg, p[next+1].reg);
-    }
-    next += 2;
-  }
-  next = reg_pool_->next_fp_reg & ~0x1;
-
-  // No choice - find a pair and kill it.
-  for (int i = 0; i < num_regs; i+=2) {
-    if (next >= num_regs)
-      next = 0;
-    if (p[next].is_temp && !p[next].in_use && p[next+1].is_temp &&
-      !p[next+1].in_use) {
-      Clobber(p[next].reg);
-      Clobber(p[next+1].reg);
-      p[next].in_use = true;
-      p[next+1].in_use = true;
-      DCHECK_EQ((p[next].reg+1), p[next+1].reg);
-      DCHECK_EQ((p[next].reg & 0x1), 0);
-      reg_pool_->next_fp_reg = next + 2;
-      if (reg_pool_->next_fp_reg >= num_regs) {
-        reg_pool_->next_fp_reg = 0;
-      }
-      return RegStorage(RegStorage::k64BitPair, p[next].reg, p[next+1].reg);
-    }
-    next += 2;
-  }
-  LOG(FATAL) << "No free temp registers (pair)";
-  return RegStorage::InvalidReg();
-}
-
 /* Return a temp if one is available, -1 otherwise */
 RegStorage Mir2Lir::AllocFreeTemp() {
-  return AllocTempBody(reg_pool_->core_regs,
-             reg_pool_->num_core_regs,
-             &reg_pool_->next_core_reg, false);
+  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, false);
 }
 
 RegStorage Mir2Lir::AllocTemp() {
-  return AllocTempBody(reg_pool_->core_regs,
-             reg_pool_->num_core_regs,
-             &reg_pool_->next_core_reg, true);
+  return AllocTempBody(reg_pool_->core_regs_, &reg_pool_->next_core_reg_, true);
 }
 
-RegStorage Mir2Lir::AllocTempFloat() {
-  return AllocTempBody(reg_pool_->FPRegs,
-             reg_pool_->num_fp_regs,
-             &reg_pool_->next_fp_reg, true);
-}
-
-Mir2Lir::RegisterInfo* Mir2Lir::AllocLiveBody(RegisterInfo* p, int num_regs, int s_reg) {
-  if (s_reg == -1)
-    return NULL;
-  for (int i = 0; i < num_regs; i++) {
-    if ((p[i].s_reg == s_reg) && p[i].live) {
-      if (p[i].is_temp)
-        p[i].in_use = true;
-      return &p[i];
-    }
-  }
-  return NULL;
-}
-
-Mir2Lir::RegisterInfo* Mir2Lir::AllocLive(int s_reg, int reg_class) {
-  RegisterInfo* res = NULL;
-  switch (reg_class) {
-    case kAnyReg:
-      res = AllocLiveBody(reg_pool_->FPRegs,
-                reg_pool_->num_fp_regs, s_reg);
-      if (res)
-        break;
-      /* Intentional fallthrough */
-    case kCoreReg:
-      res = AllocLiveBody(reg_pool_->core_regs,
-                reg_pool_->num_core_regs, s_reg);
-      break;
-    case kFPReg:
-      res = AllocLiveBody(reg_pool_->FPRegs,
-                reg_pool_->num_fp_regs, s_reg);
-      break;
-    default:
-      LOG(FATAL) << "Invalid register type";
+RegStorage Mir2Lir::AllocTempWide() {
+  RegStorage res;
+  if (reg_pool_->core64_regs_.Size() != 0) {
+    res = AllocTempBody(reg_pool_->core64_regs_, &reg_pool_->next_core64_reg_, true);
+  } else {
+    RegStorage low_reg = AllocTemp();
+    RegStorage high_reg = AllocTemp();
+    res = RegStorage::MakeRegPair(low_reg, high_reg);
   }
   return res;
 }
 
-void Mir2Lir::FreeTemp(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  if (p->is_temp) {
-    p->in_use = false;
+RegStorage Mir2Lir::AllocTempSingle() {
+  RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
+  DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  return res;
+}
+
+RegStorage Mir2Lir::AllocTempDouble() {
+  RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, true);
+  DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  return res;
+}
+
+RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempDouble();
   }
-  p->pair = false;
+  return AllocTempWide();
+}
+
+RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
+  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
+    return AllocTempSingle();
+  }
+  return AllocTemp();
+}
+
+RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg) {
+  RegStorage res;
+  GrowableArray<RegisterInfo*>::Iterator it(&regs);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if ((info->SReg() == s_reg) && info->IsLive()) {
+      res = info->GetReg();
+      break;
+    }
+  }
+  return res;
+}
+
+RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
+  RegStorage reg;
+  // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg.
+  if ((reg_class == kAnyReg) || (reg_class == kFPReg)) {
+    reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
+  }
+  if (!reg.Valid() && (reg_class != kFPReg)) {
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
+    } else {
+      reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
+    }
+  }
+  if (reg.Valid()) {
+    if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) {
+      // Only allow reg pairs for core regs on 32-bit targets.
+      RegStorage high_reg = FindLiveReg(reg_pool_->core_regs_, s_reg + 1);
+      if (high_reg.Valid()) {
+        reg = RegStorage::MakeRegPair(reg, high_reg);
+        MarkWide(reg);
+      } else {
+        // Only half available.
+        reg = RegStorage::InvalidReg();
+      }
+    }
+    if (reg.Valid() && (wide != GetRegInfo(reg)->IsWide())) {
+      // Width mismatch - don't try to reuse.
+      reg = RegStorage::InvalidReg();
+    }
+  }
+  if (reg.Valid()) {
+    if (reg.IsPair()) {
+      RegisterInfo* info_low = GetRegInfo(reg.GetLow());
+      RegisterInfo* info_high = GetRegInfo(reg.GetHigh());
+      if (info_low->IsTemp()) {
+        info_low->MarkInUse();
+      }
+      if (info_high->IsTemp()) {
+        info_high->MarkInUse();
+      }
+    } else {
+      RegisterInfo* info = GetRegInfo(reg);
+      if (info->IsTemp()) {
+        info->MarkInUse();
+      }
+    }
+  } else {
+    // Either not found, or something didn't match up. Clobber to prevent any stale instances.
+    ClobberSReg(s_reg);
+    if (wide) {
+      ClobberSReg(s_reg + 1);
+    }
+  }
+  return reg;
 }
 
 void Mir2Lir::FreeTemp(RegStorage reg) {
   if (reg.IsPair()) {
-    FreeTemp(reg.GetLowReg());
-    FreeTemp(reg.GetHighReg());
+    FreeTemp(reg.GetLow());
+    FreeTemp(reg.GetHigh());
   } else {
-    FreeTemp(reg.GetReg());
+    RegisterInfo* p = GetRegInfo(reg);
+    if (p->IsTemp()) {
+      p->MarkFree();
+      p->SetIsWide(false);
+      p->SetPartner(reg);
+    }
   }
 }
 
-Mir2Lir::RegisterInfo* Mir2Lir::IsLive(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  return p->live ? p : NULL;
+void Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
+  DCHECK(rl_keep.wide);
+  DCHECK(rl_free.wide);
+  int free_low = rl_free.reg.GetLowReg();
+  int free_high = rl_free.reg.GetHighReg();
+  int keep_low = rl_keep.reg.GetLowReg();
+  int keep_high = rl_keep.reg.GetHighReg();
+  if ((free_low != keep_low) && (free_low != keep_high) &&
+      (free_high != keep_low) && (free_high != keep_high)) {
+    // No overlap, free both
+    FreeTemp(rl_free.reg);
+  }
 }
 
 bool Mir2Lir::IsLive(RegStorage reg) {
+  bool res;
   if (reg.IsPair()) {
-    return IsLive(reg.GetLowReg()) || IsLive(reg.GetHighReg());
+    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    DCHECK_EQ(p_lo->IsLive(), p_hi->IsLive());
+    res = p_lo->IsLive() || p_hi->IsLive();
   } else {
-    return IsLive(reg.GetReg());
+    RegisterInfo* p = GetRegInfo(reg);
+    res = p->IsLive();
   }
-}
-
-Mir2Lir::RegisterInfo* Mir2Lir::IsTemp(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  return (p->is_temp) ? p : NULL;
+  return res;
 }
 
 bool Mir2Lir::IsTemp(RegStorage reg) {
+  bool res;
   if (reg.IsPair()) {
-    return IsTemp(reg.GetLowReg()) || IsTemp(reg.GetHighReg());
+    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    res = p_lo->IsTemp() || p_hi->IsTemp();
   } else {
-    return IsTemp(reg.GetReg());
+    RegisterInfo* p = GetRegInfo(reg);
+    res = p->IsTemp();
   }
-}
-
-Mir2Lir::RegisterInfo* Mir2Lir::IsPromoted(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  return (p->is_temp) ? NULL : p;
+  return res;
 }
 
 bool Mir2Lir::IsPromoted(RegStorage reg) {
+  bool res;
   if (reg.IsPair()) {
-    return IsPromoted(reg.GetLowReg()) || IsPromoted(reg.GetHighReg());
+    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    res = !p_lo->IsTemp() || !p_hi->IsTemp();
   } else {
-    return IsPromoted(reg.GetReg());
+    RegisterInfo* p = GetRegInfo(reg);
+    res = !p->IsTemp();
   }
-}
-
-bool Mir2Lir::IsDirty(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  return p->dirty;
+  return res;
 }
 
 bool Mir2Lir::IsDirty(RegStorage reg) {
+  bool res;
   if (reg.IsPair()) {
-    return IsDirty(reg.GetLowReg()) || IsDirty(reg.GetHighReg());
+    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    res = p_lo->IsDirty() || p_hi->IsDirty();
   } else {
-    return IsDirty(reg.GetReg());
+    RegisterInfo* p = GetRegInfo(reg);
+    res = p->IsDirty();
   }
+  return res;
 }
 
 /*
@@ -483,35 +582,44 @@
  * register.  No check is made to see if the register was previously
  * allocated.  Use with caution.
  */
-void Mir2Lir::LockTemp(int reg) {
-  RegisterInfo* p = GetRegInfo(reg);
-  DCHECK(p->is_temp);
-  p->in_use = true;
-  p->live = false;
-}
-
 void Mir2Lir::LockTemp(RegStorage reg) {
-  DCHECK(!reg.IsPair());
-  LockTemp(reg.GetReg());
-}
-
-void Mir2Lir::ResetDef(int reg) {
-  ResetDefBody(GetRegInfo(reg));
+  DCHECK(IsTemp(reg));
+  if (reg.IsPair()) {
+    RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    p_lo->MarkInUse();
+    p_lo->MarkDead();
+    p_hi->MarkInUse();
+    p_hi->MarkDead();
+  } else {
+    RegisterInfo* p = GetRegInfo(reg);
+    p->MarkInUse();
+    p->MarkDead();
+  }
 }
 
 void Mir2Lir::ResetDef(RegStorage reg) {
-  DCHECK(!reg.IsPair());  // Is this done?  If so, do on both low and high.
-  ResetDef(reg.GetReg());
+  if (reg.IsPair()) {
+    GetRegInfo(reg.GetLow())->ResetDefBody();
+    GetRegInfo(reg.GetHigh())->ResetDefBody();
+  } else {
+    GetRegInfo(reg)->ResetDefBody();
+  }
 }
 
-void Mir2Lir::NullifyRange(LIR *start, LIR *finish, int s_reg1, int s_reg2) {
-  if (start && finish) {
-    LIR *p;
-    DCHECK_EQ(s_reg1, s_reg2);
-    for (p = start; ; p = p->next) {
+void Mir2Lir::NullifyRange(RegStorage reg, int s_reg) {
+  RegisterInfo* info = nullptr;
+  RegStorage rs = reg.IsPair() ? reg.GetLow() : reg;
+  if (IsTemp(rs)) {
+    info = GetRegInfo(reg);
+  }
+  if ((info != nullptr) && (info->DefStart() != nullptr) && (info->DefEnd() != nullptr)) {
+    DCHECK_EQ(info->SReg(), s_reg);  // Make sure we're on the same page.
+    for (LIR* p = info->DefStart();; p = p->next) {
       NopLIR(p);
-      if (p == finish)
+      if (p == info->DefEnd()) {
         break;
+      }
     }
   }
 }
@@ -525,9 +633,9 @@
   DCHECK(!rl.wide);
   DCHECK(start && start->next);
   DCHECK(finish);
-  RegisterInfo* p = GetRegInfo(rl.reg.GetReg());
-  p->def_start = start->next;
-  p->def_end = finish;
+  RegisterInfo* p = GetRegInfo(rl.reg);
+  p->SetDefStart(start->next);
+  p->SetDefEnd(finish);
 }
 
 /*
@@ -539,28 +647,45 @@
   DCHECK(rl.wide);
   DCHECK(start && start->next);
   DCHECK(finish);
-  RegisterInfo* p = GetRegInfo(rl.reg.GetLowReg());
-  ResetDef(rl.reg.GetHighReg());  // Only track low of pair
-  p->def_start = start->next;
-  p->def_end = finish;
+  RegisterInfo* p;
+  if (rl.reg.IsPair()) {
+    p = GetRegInfo(rl.reg.GetLow());
+    ResetDef(rl.reg.GetHigh());  // Only track low of pair
+  } else {
+    p = GetRegInfo(rl.reg);
+  }
+  p->SetDefStart(start->next);
+  p->SetDefEnd(finish);
 }
 
 RegLocation Mir2Lir::WideToNarrow(RegLocation rl) {
   DCHECK(rl.wide);
   if (rl.location == kLocPhysReg) {
-    RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLowReg());
-    RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHighReg());
-    if (info_lo->is_temp) {
-      info_lo->pair = false;
-      info_lo->def_start = NULL;
-      info_lo->def_end = NULL;
+    if (rl.reg.IsPair()) {
+      RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow());
+      RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh());
+      if (info_lo->IsTemp()) {
+        info_lo->SetIsWide(false);
+        info_lo->ResetDefBody();
+      }
+      if (info_hi->IsTemp()) {
+        info_hi->SetIsWide(false);
+        info_hi->ResetDefBody();
+      }
+      rl.reg = rl.reg.GetLow();
+    } else {
+      /*
+       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
+       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
+       * this code.  Will probably want to make this a virtual function.
+       */
+      // Can't narrow 64-bit register.  Clobber.
+      if (GetRegInfo(rl.reg)->IsTemp()) {
+        Clobber(rl.reg);
+        FreeTemp(rl.reg);
+      }
+      rl.location = kLocDalvikFrame;
     }
-    if (info_hi->is_temp) {
-      info_hi->pair = false;
-      info_hi->def_start = NULL;
-      info_hi->def_end = NULL;
-    }
-    rl.reg = RegStorage::Solo32(rl.reg.GetLowReg());
   }
   rl.wide = false;
   return rl;
@@ -568,220 +693,294 @@
 
 void Mir2Lir::ResetDefLoc(RegLocation rl) {
   DCHECK(!rl.wide);
-  RegisterInfo* p = IsTemp(rl.reg.GetReg());
-  if (p && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-    DCHECK(!p->pair);
-    NullifyRange(p->def_start, p->def_end, p->s_reg, rl.s_reg_low);
+  if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+    NullifyRange(rl.reg, rl.s_reg_low);
   }
-  ResetDef(rl.reg.GetReg());
+  ResetDef(rl.reg);
 }
 
 void Mir2Lir::ResetDefLocWide(RegLocation rl) {
   DCHECK(rl.wide);
-  RegisterInfo* p_low = IsTemp(rl.reg.GetLowReg());
-  RegisterInfo* p_high = IsTemp(rl.reg.GetHighReg());
-  if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-    DCHECK(p_low->pair);
-    NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+  // If pair, only track low reg of pair.
+  RegStorage rs = rl.reg.IsPair() ? rl.reg.GetLow() : rl.reg;
+  if (IsTemp(rs) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+    NullifyRange(rs, rl.s_reg_low);
   }
-  if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-    DCHECK(p_high->pair);
-  }
-  ResetDef(rl.reg.GetLowReg());
-  ResetDef(rl.reg.GetHighReg());
+  ResetDef(rs);
 }
 
 void Mir2Lir::ResetDefTracking() {
-  for (int i = 0; i< reg_pool_->num_core_regs; i++) {
-    ResetDefBody(&reg_pool_->core_regs[i]);
+  GrowableArray<RegisterInfo*>::Iterator core_it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) {
+    info->ResetDefBody();
   }
-  for (int i = 0; i< reg_pool_->num_fp_regs; i++) {
-    ResetDefBody(&reg_pool_->FPRegs[i]);
+  GrowableArray<RegisterInfo*>::Iterator sp_it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) {
+    info->ResetDefBody();
+  }
+  GrowableArray<RegisterInfo*>::Iterator dp_it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) {
+    info->ResetDefBody();
   }
 }
 
-void Mir2Lir::ClobberAllRegs() {
+void Mir2Lir::ClobberAllTemps() {
   GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
   for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
-    info->live = false;
-    info->s_reg = INVALID_SREG;
-    info->def_start = NULL;
-    info->def_end = NULL;
-    info->pair = false;
+    ClobberBody(info);
+  }
+}
+
+void Mir2Lir::FlushRegWide(RegStorage reg) {
+  if (reg.IsPair()) {
+    RegisterInfo* info1 = GetRegInfo(reg.GetLow());
+    RegisterInfo* info2 = GetRegInfo(reg.GetHigh());
+    DCHECK(info1 && info2 && info1->IsWide() && info2->IsWide() &&
+         (info1->Partner() == info2->GetReg()) && (info2->Partner() == info1->GetReg()));
+    if ((info1->IsLive() && info1->IsDirty()) || (info2->IsLive() && info2->IsDirty())) {
+      if (!(info1->IsTemp() && info2->IsTemp())) {
+        /* Should not happen.  If it does, there's a problem in eval_loc */
+        LOG(FATAL) << "Long half-temp, half-promoted";
+      }
+
+      info1->SetIsDirty(false);
+      info2->SetIsDirty(false);
+      if (mir_graph_->SRegToVReg(info2->SReg()) < mir_graph_->SRegToVReg(info1->SReg())) {
+        info1 = info2;
+      }
+      int v_reg = mir_graph_->SRegToVReg(info1->SReg());
+      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
+    }
+  } else {
+    RegisterInfo* info = GetRegInfo(reg);
+    if (info->IsLive() && info->IsDirty()) {
+      info->SetIsDirty(false);
+      int v_reg = mir_graph_->SRegToVReg(info->SReg());
+      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64);
+    }
+  }
+}
+
+void Mir2Lir::FlushReg(RegStorage reg) {
+  DCHECK(!reg.IsPair());
+  RegisterInfo* info = GetRegInfo(reg);
+  if (info->IsLive() && info->IsDirty()) {
+    info->SetIsDirty(false);
+    int v_reg = mir_graph_->SRegToVReg(info->SReg());
+    StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord);
   }
 }
 
 void Mir2Lir::FlushSpecificReg(RegisterInfo* info) {
-  if (info->pair) {
-    FlushRegWide(RegStorage(RegStorage::k64BitPair, info->reg, info->partner));
+  if (info->IsWide()) {
+    FlushRegWide(info->GetReg());
   } else {
-    FlushReg(RegStorage::Solo32(info->reg));
-  }
-}
-
-// Make sure nothing is live and dirty
-void Mir2Lir::FlushAllRegsBody(RegisterInfo* info, int num_regs) {
-  for (int i = 0; i < num_regs; i++) {
-    if (info[i].live && info[i].dirty) {
-      FlushSpecificReg(&info[i]);
-    }
+    FlushReg(info->GetReg());
   }
 }
 
 void Mir2Lir::FlushAllRegs() {
-  FlushAllRegsBody(reg_pool_->core_regs,
-           reg_pool_->num_core_regs);
-  FlushAllRegsBody(reg_pool_->FPRegs,
-           reg_pool_->num_fp_regs);
-  ClobberAllRegs();
+  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (info->IsDirty() && info->IsLive()) {
+      FlushSpecificReg(info);
+    }
+    info->MarkDead();
+    info->SetIsWide(false);
+  }
 }
 
 
-// TUNING: rewrite all of this reg stuff.  Probably use an attribute table
 bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
-  int reg_num = reg.IsPair() ? reg.GetLowReg() : reg.GetReg();
   if (reg_class == kAnyReg) {
     return true;
   } else if (reg_class == kCoreReg) {
-    return !IsFpReg(reg_num);
+    return !reg.IsFloat();
   } else {
-    return IsFpReg(reg_num);
+    return reg.IsFloat();
   }
 }
 
-void Mir2Lir::MarkLive(RegStorage reg, int s_reg) {
-  DCHECK(!reg.IsPair());   // Could be done - but would that be meaningful?
-  RegisterInfo* info = GetRegInfo(reg.GetReg());
-  if ((info->s_reg == s_reg) && info->live) {
-    return;  /* already live */
-  } else if (s_reg != INVALID_SREG) {
-    ClobberSReg(s_reg);
-    if (info->is_temp) {
-      info->live = true;
+void Mir2Lir::MarkLive(RegLocation loc) {
+  RegStorage reg = loc.reg;
+  if (!IsTemp(reg)) {
+    return;
+  }
+  int s_reg = loc.s_reg_low;
+  if (s_reg == INVALID_SREG) {
+    // Can't be live if no associated sreg.
+    if (reg.IsPair()) {
+      GetRegInfo(reg.GetLow())->MarkDead();
+      GetRegInfo(reg.GetHigh())->MarkDead();
+    } else {
+      GetRegInfo(reg)->MarkDead();
     }
   } else {
-    /* Can't be live if no associated s_reg */
-    DCHECK(info->is_temp);
-    info->live = false;
+    if (reg.IsPair()) {
+      RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
+      RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+      if (info_lo->IsLive() && (info_lo->SReg() == s_reg) && info_hi->IsLive() &&
+          (info_hi->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      ClobberSReg(s_reg + 1);
+      info_lo->MarkLive(s_reg);
+      info_hi->MarkLive(s_reg + 1);
+    } else {
+      RegisterInfo* info = GetRegInfo(reg);
+      if (info->IsLive() && (info->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      if (loc.wide) {
+        ClobberSReg(s_reg + 1);
+      }
+      info->MarkLive(s_reg);
+    }
+    if (loc.wide) {
+      MarkWide(reg);
+    } else {
+      MarkNarrow(reg);
+    }
   }
-  info->s_reg = s_reg;
-}
-
-void Mir2Lir::MarkTemp(int reg) {
-  RegisterInfo* info = GetRegInfo(reg);
-  tempreg_info_.Insert(info);
-  info->is_temp = true;
 }
 
 void Mir2Lir::MarkTemp(RegStorage reg) {
   DCHECK(!reg.IsPair());
-  MarkTemp(reg.GetReg());
-}
-
-void Mir2Lir::UnmarkTemp(int reg) {
   RegisterInfo* info = GetRegInfo(reg);
-  tempreg_info_.Delete(info);
-  info->is_temp = false;
+  tempreg_info_.Insert(info);
+  info->SetIsTemp(true);
 }
 
 void Mir2Lir::UnmarkTemp(RegStorage reg) {
   DCHECK(!reg.IsPair());
-  UnmarkTemp(reg.GetReg());
+  RegisterInfo* info = GetRegInfo(reg);
+  tempreg_info_.Delete(info);
+  info->SetIsTemp(false);
 }
 
-void Mir2Lir::MarkPair(int low_reg, int high_reg) {
-  DCHECK_NE(low_reg, high_reg);
-  RegisterInfo* info_lo = GetRegInfo(low_reg);
-  RegisterInfo* info_hi = GetRegInfo(high_reg);
-  info_lo->pair = info_hi->pair = true;
-  info_lo->partner = high_reg;
-  info_hi->partner = low_reg;
-}
-
-void Mir2Lir::MarkClean(RegLocation loc) {
-  if (loc.wide) {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetLowReg());
-    info->dirty = false;
-    info = GetRegInfo(loc.reg.GetHighReg());
-    info->dirty = false;
+void Mir2Lir::MarkWide(RegStorage reg) {
+  if (reg.IsPair()) {
+    RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
+    RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+    // Unpair any old partners.
+    if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) {
+      GetRegInfo(info_lo->Partner())->SetIsWide(false);
+    }
+    if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) {
+      GetRegInfo(info_hi->Partner())->SetIsWide(false);
+    }
+    info_lo->SetIsWide(true);
+    info_hi->SetIsWide(true);
+    info_lo->SetPartner(reg.GetHigh());
+    info_hi->SetPartner(reg.GetLow());
   } else {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetReg());
-    info->dirty = false;
+    RegisterInfo* info = GetRegInfo(reg);
+    info->SetIsWide(true);
+    info->SetPartner(reg);
   }
 }
 
+void Mir2Lir::MarkNarrow(RegStorage reg) {
+  DCHECK(!reg.IsPair());
+  RegisterInfo* info = GetRegInfo(reg);
+  info->SetIsWide(false);
+  info->SetPartner(reg);
+}
+
+void Mir2Lir::MarkClean(RegLocation loc) {
+  if (loc.reg.IsPair()) {
+    RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
+    info->SetIsDirty(false);
+    info = GetRegInfo(loc.reg.GetHigh());
+    info->SetIsDirty(false);
+  } else {
+    RegisterInfo* info = GetRegInfo(loc.reg);
+    info->SetIsDirty(false);
+  }
+}
+
+// FIXME: need to verify rules/assumptions about how wide values are treated in 64BitSolos.
 void Mir2Lir::MarkDirty(RegLocation loc) {
   if (loc.home) {
     // If already home, can't be dirty
     return;
   }
-  if (loc.wide) {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetLowReg());
-    info->dirty = true;
-    info = GetRegInfo(loc.reg.GetHighReg());
-    info->dirty = true;
+  if (loc.reg.IsPair()) {
+    RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
+    info->SetIsDirty(true);
+    info = GetRegInfo(loc.reg.GetHigh());
+    info->SetIsDirty(true);
   } else {
-    RegisterInfo* info = GetRegInfo(loc.reg.GetReg());
-    info->dirty = true;
+    RegisterInfo* info = GetRegInfo(loc.reg);
+    info->SetIsDirty(true);
   }
 }
 
-void Mir2Lir::MarkInUse(int reg) {
-    RegisterInfo* info = GetRegInfo(reg);
-    info->in_use = true;
-}
-
 void Mir2Lir::MarkInUse(RegStorage reg) {
   if (reg.IsPair()) {
-    MarkInUse(reg.GetLowReg());
-    MarkInUse(reg.GetHighReg());
+    GetRegInfo(reg.GetLow())->MarkInUse();
+    GetRegInfo(reg.GetHigh())->MarkInUse();
   } else {
-    MarkInUse(reg.GetReg());
+    GetRegInfo(reg)->MarkInUse();
   }
 }
 
-void Mir2Lir::CopyRegInfo(int new_reg, int old_reg) {
-  RegisterInfo* new_info = GetRegInfo(new_reg);
-  RegisterInfo* old_info = GetRegInfo(old_reg);
-  // Target temp, live, dirty status must not change
-  bool is_temp = new_info->is_temp;
-  bool live = new_info->live;
-  bool dirty = new_info->dirty;
-  *new_info = *old_info;
-  // Restore target's temp, live, dirty status
-  new_info->is_temp = is_temp;
-  new_info->live = live;
-  new_info->dirty = dirty;
-  new_info->reg = new_reg;
-}
-
-void Mir2Lir::CopyRegInfo(RegStorage new_reg, RegStorage old_reg) {
-  DCHECK(!new_reg.IsPair());
-  DCHECK(!old_reg.IsPair());
-  CopyRegInfo(new_reg.GetReg(), old_reg.GetReg());
-}
-
 bool Mir2Lir::CheckCorePoolSanity() {
-  for (static int i = 0; i < reg_pool_->num_core_regs; i++) {
-    if (reg_pool_->core_regs[i].pair) {
-      static int my_reg = reg_pool_->core_regs[i].reg;
-      static int my_sreg = reg_pool_->core_regs[i].s_reg;
-      static int partner_reg = reg_pool_->core_regs[i].partner;
-      static RegisterInfo* partner = GetRegInfo(partner_reg);
+  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (info->IsTemp() && info->IsLive() && info->IsWide()) {
+      RegStorage my_reg = info->GetReg();
+      int my_sreg = info->SReg();
+      RegStorage partner_reg = info->Partner();
+      RegisterInfo* partner = GetRegInfo(partner_reg);
       DCHECK(partner != NULL);
-      DCHECK(partner->pair);
-      DCHECK_EQ(my_reg, partner->partner);
-      static int partner_sreg = partner->s_reg;
+      DCHECK(partner->IsWide());
+      DCHECK_EQ(my_reg.GetReg(), partner->Partner().GetReg());
+      DCHECK(partner->IsLive());
+      int partner_sreg = partner->SReg();
       if (my_sreg == INVALID_SREG) {
         DCHECK_EQ(partner_sreg, INVALID_SREG);
       } else {
         int diff = my_sreg - partner_sreg;
-        DCHECK((diff == -1) || (diff == 1));
+        DCHECK((diff == 0) || (diff == -1) || (diff == 1));
       }
     }
-    if (!reg_pool_->core_regs[i].live) {
-      DCHECK(reg_pool_->core_regs[i].def_start == NULL);
-      DCHECK(reg_pool_->core_regs[i].def_end == NULL);
+    if (info->Master() != info) {
+      // Aliased.
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // If I'm live, master should not be live, but should show liveness in alias set.
+        DCHECK_EQ(info->Master()->SReg(), INVALID_SREG);
+        DCHECK(!info->Master()->IsDead());
+      } else if (!info->IsDead()) {
+        // If I'm not live, but there is liveness in the set master must be live.
+        DCHECK_EQ(info->SReg(), INVALID_SREG);
+        DCHECK(info->Master()->IsLive());
+      }
+    }
+    if (info->IsAliased()) {
+      // Has child aliases.
+      DCHECK_EQ(info->Master(), info);
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // Master live, no child should be dead - all should show liveness in set.
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          DCHECK(!p->IsDead());
+          DCHECK_EQ(p->SReg(), INVALID_SREG);
+        }
+      } else if (!info->IsDead()) {
+        // Master not live, one or more aliases must be.
+        bool live_alias = false;
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          live_alias |= p->IsLive();
+        }
+        DCHECK(live_alias);
+      }
+    }
+    if (info->IsLive() && (info->SReg() == INVALID_SREG)) {
+      // If not fully live, should have INVALID_SREG and def's should be null.
+      DCHECK(info->DefStart() == nullptr);
+      DCHECK(info->DefEnd() == nullptr);
     }
   }
   return true;
@@ -796,80 +995,64 @@
  * is a bit complex when dealing with FP regs.  Examine code to see
  * if it's worthwhile trying to be more clever here.
  */
-
 RegLocation Mir2Lir::UpdateLoc(RegLocation loc) {
   DCHECK(!loc.wide);
   DCHECK(CheckCorePoolSanity());
   if (loc.location != kLocPhysReg) {
     DCHECK((loc.location == kLocDalvikFrame) ||
          (loc.location == kLocCompilerTemp));
-    RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
-    if (info_lo) {
-      if (info_lo->pair) {
-        Clobber(info_lo->reg);
-        Clobber(info_lo->partner);
-        FreeTemp(info_lo->reg);
-      } else {
-        loc.reg = RegStorage::Solo32(info_lo->reg);
+    RegStorage reg = AllocLiveReg(loc.s_reg_low, kAnyReg, false);
+    if (reg.Valid()) {
+      bool match = true;
+      RegisterInfo* info = GetRegInfo(reg);
+      match &= !reg.IsPair();
+      match &= !info->IsWide();
+      if (match) {
         loc.location = kLocPhysReg;
+        loc.reg = reg;
+      } else {
+        Clobber(reg);
+        FreeTemp(reg);
       }
     }
   }
   return loc;
 }
 
-/* see comments for update_loc */
 RegLocation Mir2Lir::UpdateLocWide(RegLocation loc) {
   DCHECK(loc.wide);
   DCHECK(CheckCorePoolSanity());
   if (loc.location != kLocPhysReg) {
     DCHECK((loc.location == kLocDalvikFrame) ||
          (loc.location == kLocCompilerTemp));
-    // Are the dalvik regs already live in physical registers?
-    RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
-    RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg);
-    bool match = true;
-    match = match && (info_lo != NULL);
-    match = match && (info_hi != NULL);
-    // Are they both core or both FP?
-    match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg));
-    // If a pair of floating point singles, are they properly aligned?
-    if (match && IsFpReg(info_lo->reg)) {
-      match &= ((info_lo->reg & 0x1) == 0);
-      match &= ((info_hi->reg - info_lo->reg) == 1);
-    }
-    // If previously used as a pair, it is the same pair?
-    if (match && (info_lo->pair || info_hi->pair)) {
-      match = (info_lo->pair == info_hi->pair);
-      match &= ((info_lo->reg == info_hi->partner) &&
-            (info_hi->reg == info_lo->partner));
-    }
-    if (match) {
-      // Can reuse - update the register usage info
-      loc.location = kLocPhysReg;
-      loc.reg = RegStorage(RegStorage::k64BitPair, info_lo->reg, info_hi->reg);
-      MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-      DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
-      return loc;
-    }
-    // Can't easily reuse - clobber and free any overlaps
-    if (info_lo) {
-      Clobber(info_lo->reg);
-      FreeTemp(info_lo->reg);
-      if (info_lo->pair)
-        Clobber(info_lo->partner);
-    }
-    if (info_hi) {
-      Clobber(info_hi->reg);
-      FreeTemp(info_hi->reg);
-      if (info_hi->pair)
-        Clobber(info_hi->partner);
+    RegStorage reg = AllocLiveReg(loc.s_reg_low, kAnyReg, true);
+    if (reg.Valid()) {
+      bool match = true;
+      if (reg.IsPair()) {
+        // If we've got a register pair, make sure that it was last used as the same pair.
+        RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
+        RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+        match &= info_lo->IsWide();
+        match &= info_hi->IsWide();
+        match &= (info_lo->Partner() == info_hi->GetReg());
+        match &= (info_hi->Partner() == info_lo->GetReg());
+      } else {
+        RegisterInfo* info = GetRegInfo(reg);
+        match &= info->IsWide();
+        match &= (info->GetReg() == info->Partner());
+      }
+      if (match) {
+        loc.location = kLocPhysReg;
+        loc.reg = reg;
+      } else {
+        Clobber(reg);
+        FreeTemp(reg);
+      }
     }
   }
   return loc;
 }
 
-
 /* For use in cases we don't know (or care) width */
 RegLocation Mir2Lir::UpdateRawLoc(RegLocation loc) {
   if (loc.wide)
@@ -885,18 +1068,15 @@
 
   /* If already in registers, we can assume proper form.  Right reg class? */
   if (loc.location == kLocPhysReg) {
-    DCHECK_EQ(IsFpReg(loc.reg.GetLowReg()), IsFpReg(loc.reg.GetHighReg()));
-    DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
     if (!RegClassMatches(reg_class, loc.reg)) {
-      /* Wrong register class.  Reallocate and copy */
+      // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
-      OpRegCopyWide(new_regs, loc.reg);
-      CopyRegInfo(new_regs.GetLowReg(), loc.reg.GetLowReg());
-      CopyRegInfo(new_regs.GetHighReg(), loc.reg.GetHighReg());
+      // Clobber the old regs.
       Clobber(loc.reg);
+      // ...and mark the new ones live.
       loc.reg = new_regs;
-      MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-      DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
+      MarkWide(loc.reg);
+      MarkLive(loc);
     }
     return loc;
   }
@@ -905,34 +1085,31 @@
   DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
 
   loc.reg = AllocTypedTempWide(loc.fp, reg_class);
+  MarkWide(loc.reg);
 
-  MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
   if (update) {
     loc.location = kLocPhysReg;
-    MarkLive(loc.reg.GetLow(), loc.s_reg_low);
-    // Does this wide value live in two registers or one vector register?
-    if (loc.reg.GetLowReg() != loc.reg.GetHighReg()) {
-      MarkLive(loc.reg.GetHigh(), GetSRegHi(loc.s_reg_low));
-    }
+    MarkLive(loc);
   }
-  DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
   return loc;
 }
 
 RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
-  if (loc.wide)
+  if (loc.wide) {
     return EvalLocWide(loc, reg_class, update);
+  }
 
   loc = UpdateLoc(loc);
 
   if (loc.location == kLocPhysReg) {
     if (!RegClassMatches(reg_class, loc.reg)) {
-      /* Wrong register class.  Realloc, copy and transfer ownership */
+      // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      OpRegCopy(new_reg, loc.reg);
-      CopyRegInfo(new_reg, loc.reg);
+      // Clobber the old reg.
       Clobber(loc.reg);
+      // ...and mark the new one live.
       loc.reg = new_reg;
+      MarkLive(loc);
     }
     return loc;
   }
@@ -943,7 +1120,7 @@
 
   if (update) {
     loc.location = kLocPhysReg;
-    MarkLive(loc.reg, loc.s_reg_low);
+    MarkLive(loc);
   }
   return loc;
 }
@@ -1115,9 +1292,14 @@
           int low_reg = promotion_map_[p_map_idx].FpReg;
           int high_reg = promotion_map_[p_map_idx+1].FpReg;
           // Doubles require pair of singles starting at even reg
+          // TODO: move target-specific restrictions out of here.
           if (((low_reg & 0x1) == 0) && ((low_reg + 1) == high_reg)) {
             curr->location = kLocPhysReg;
-            curr->reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
+            if (cu_->instruction_set == kThumb2) {
+              curr->reg = RegStorage::FloatSolo64(RegStorage::RegNum(low_reg) >> 1);
+            } else {
+              curr->reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
+            }
             curr->home = true;
           }
         }
@@ -1155,14 +1337,9 @@
   RegLocation gpr_res = LocCReturnWide();
   RegLocation fpr_res = LocCReturnDouble();
   RegLocation res = is_double ? fpr_res : gpr_res;
-  Clobber(res.reg.GetLowReg());
-  Clobber(res.reg.GetHighReg());
-  LockTemp(res.reg.GetLowReg());
-  LockTemp(res.reg.GetHighReg());
-  // Does this wide value live in two registers or one vector register?
-  if (res.reg.GetLowReg() != res.reg.GetHighReg()) {
-    MarkPair(res.reg.GetLowReg(), res.reg.GetHighReg());
-  }
+  Clobber(res.reg);
+  LockTemp(res.reg);
+  MarkWide(res.reg);
   return res;
 }
 
@@ -1170,11 +1347,11 @@
   RegLocation gpr_res = LocCReturn();
   RegLocation fpr_res = LocCReturnFloat();
   RegLocation res = is_float ? fpr_res : gpr_res;
-  Clobber(res.reg.GetReg());
+  Clobber(res.reg);
   if (cu_->instruction_set == kMips) {
-    MarkInUse(res.reg.GetReg());
+    MarkInUse(res.reg);
   } else {
-    LockTemp(res.reg.GetReg());
+    LockTemp(res.reg);
   }
   return res;
 }
@@ -1204,14 +1381,9 @@
   return (lowSreg == INVALID_SREG) ? INVALID_SREG : lowSreg + 1;
 }
 
-bool Mir2Lir::oat_live_out(int s_reg) {
+bool Mir2Lir::LiveOut(int s_reg) {
   // For now.
   return true;
 }
 
-int Mir2Lir::oatSSASrc(MIR* mir, int num) {
-  DCHECK_GT(mir->ssa_rep->num_uses, num);
-  return mir->ssa_rep->uses[num];
-}
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e7a1a69..9200106 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -25,7 +25,7 @@
 const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
   { kX8632BitData, kData,    IS_UNARY_OP,            { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data",  "0x!0d" },
   { kX86Bkpt,      kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" },
-  { kX86Nop,       kNop,     IS_UNARY_OP,            { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
+  { kX86Nop,       kNop,     NO_OPERAND,             { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop",   "" },
 
 #define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \
                      rm8_r8, rm32_r32, \
@@ -63,17 +63,24 @@
 { kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
 { kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64MR,  kMemReg64,  mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
 { kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg64, mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
 { kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
 { kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
 { kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
 { kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RT,  kReg64Thread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
 { kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI,  kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
 { kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI8, kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }
@@ -164,23 +171,31 @@
   { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
 
   { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov64MR, kMemReg64,  IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,         0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64AR, kArrayReg64, IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,        0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
   { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
   { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
   { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4 }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
 
   { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
 
   { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
 
+  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
 { kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
 { kX86 ## opname ## 8MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \
@@ -213,8 +228,10 @@
 #undef SHIFT_ENCODING_MAP
 
   { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
-  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
-  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
+  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32RRI", "!0r,!1r,!2d" },
+  { kX86Shld32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" },
+  { kX86Shrd32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
   { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -233,15 +250,15 @@
                            arr, arr_kind, arr_flags, imm, \
                            b_flags, hw_flags, w_flags, \
                            b_format, hw_format, w_format) \
-{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, #b_format "!0r" }, \
-{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, #b_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, #b_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, #hw_format "!0r" }, \
-{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, #hw_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, #hw_format "[!0r+!1r<<!2d+!3d]" }, \
-{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, #w_format "!0r" }, \
-{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, #w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, #w_format "[!0r+!1r<<!2d+!3d]" }
+{ kX86 ## opname ## 8 ## reg,  reg_kind,                      reg_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, b_format "!0r" }, \
+{ kX86 ## opname ## 8 ## mem,  mem_kind, IS_LOAD | is_store | mem_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, b_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 8 ## arr,  arr_kind, IS_LOAD | is_store | arr_flags | b_flags  | sets_ccodes, { 0,    0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 16 ## reg, reg_kind,                      reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, hw_format "!0r" }, \
+{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, hw_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
 
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
@@ -258,9 +275,9 @@
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
 
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
   EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0),
   { kX86MovsdMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" },
@@ -276,23 +293,23 @@
   EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF2, 0x5A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF3, 0x5A, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0),
-  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0_USE0),
 
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
@@ -303,6 +320,11 @@
   { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" },
   { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
+  EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
+  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" },
+  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
   { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
   { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
@@ -322,7 +344,7 @@
   { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
-  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE01,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
+  { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
   { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
@@ -334,8 +356,8 @@
   // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
   { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
 
-  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_DEF0 | SETS_CCODES),
-  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
   { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
   { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
@@ -369,10 +391,10 @@
   { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
   { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
   { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr",      "!0r,!1d" },
-  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | SETS_CCODES,         { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
+  { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
 };
 
-static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -388,11 +410,13 @@
     }
   }
   ++size;  // modrm
-  if (has_sib || base == rX86_SP) {
+  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()
+      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
     // SP requires a SIB byte.
+    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
     ++size;
   }
-  if (displacement != 0 || base == rBP) {
+  if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) {
     // BP requires an explicit displacement, even when it's 0.
     if (entry->opcode != kX86Lea32RA) {
       DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
@@ -417,14 +441,19 @@
       return 3;  // 1 byte of opcode + 2 prefixes
     case kRegOpcode:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
+    case kReg64:
     case kReg:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false);
     case kMem:  // lir operands - 0: base, 1: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+    case kMemReg64:
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+    case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+    case kArrayReg64:
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
@@ -437,8 +466,10 @@
       return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
       return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+    case kReg64Thread:  // lir operands - 0: reg, 1: disp
     case kRegThread:  // lir operands - 0: reg, 1: disp
       return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+    case kReg64Imm:
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
       size_t size = ComputeSize(entry, 0, 0, false);
       if (entry->skeleton.ax_opcode == 0) {
@@ -446,7 +477,7 @@
       } else {
         // AX opcodes don't require the modrm byte.
         int reg = lir->operands[0];
-        return size - (reg == rAX ? 1 : 0);
+        return size - (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() ? 1 : 0);
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
@@ -489,6 +520,8 @@
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
       return ComputeSize(entry, 0, 0, false);
+    case kRegMemCond:  // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
+      return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -533,7 +566,7 @@
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
           ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) -
-          (lir->operands[0] == rAX  ? 1 : 0);  // shorter ax encoding
+          (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);  // shorter ax encoding
     default:
       break;
   }
@@ -543,7 +576,12 @@
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
   if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
+      // 64 bit adresses by GS, not FS
+      code_buffer_.push_back(THREAD_PREFIX_GS);
+    } else {
+      code_buffer_.push_back(entry->skeleton.prefix1);
+    }
     if (entry->skeleton.prefix2 != 0) {
       code_buffer_.push_back(entry->skeleton.prefix2);
     }
@@ -574,7 +612,7 @@
 
 static uint8_t ModrmForDisp(int base, int disp) {
   // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && base != rBP) {
+  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
     return 0;
   } else if (IS_SIMM8(disp)) {
     return 1;
@@ -585,7 +623,7 @@
 
 void X86Mir2Lir::EmitDisp(uint8_t base, int disp) {
   // BP requires an explicit disp, so do not omit it in the 0 case
-  if (disp == 0 && base != rBP) {
+  if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) {
     return;
   } else if (IS_SIMM8(disp)) {
     code_buffer_.push_back(disp & 0xFF);
@@ -597,27 +635,42 @@
   }
 }
 
+void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
+  if (Gen64Bit()) {
+    // Absolute adressing for GS access.
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rX86_SP.GetRegNum();
+    code_buffer_.push_back(modrm);
+    uint8_t sib = (0/*TIMES_1*/ << 6) | (rs_rX86_SP.GetRegNum() << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(sib);
+  } else {
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(modrm);
+  }
+}
+
 void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) {
-  DCHECK_LT(reg_or_opcode, 8);
-  DCHECK_LT(base, 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
+  DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
+  DCHECK_LT(RegStorage::RegNum(base), 8);
+  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (RegStorage::RegNum(reg_or_opcode) << 3) |
+     RegStorage::RegNum(base);
   code_buffer_.push_back(modrm);
-  if (base == rX86_SP) {
+  if (RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) {
     // Special SIB for SP base
-    code_buffer_.push_back(0 << 6 | (rX86_SP << 3) | rX86_SP);
+    code_buffer_.push_back(0 << 6 | rs_rX86_SP.GetRegNum() << 3 | rs_rX86_SP.GetRegNum());
   }
   EmitDisp(base, disp);
 }
 
 void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
                                   int scale, int disp) {
-  DCHECK_LT(reg_or_opcode, 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | rX86_SP;
+  DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
+  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | RegStorage::RegNum(reg_or_opcode) << 3 |
+      rs_rX86_SP.GetRegNum();
   code_buffer_.push_back(modrm);
   DCHECK_LT(scale, 4);
-  DCHECK_LT(index, 8);
-  DCHECK_LT(base, 8);
-  uint8_t sib = (scale << 6) | (index << 3) | base;
+  DCHECK_LT(RegStorage::RegNum(index), 8);
+  DCHECK_LT(RegStorage::RegNum(base), 8);
+  uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base);
   code_buffer_.push_back(sib);
   EmitDisp(base, disp);
 }
@@ -651,24 +704,22 @@
   // There's no 3-byte instruction with +rd
   DCHECK(entry->skeleton.opcode != 0x0F ||
          (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
-  DCHECK(!X86_FPREG(reg));
-  DCHECK_LT(reg, 8);
-  code_buffer_.back() += reg;
+  DCHECK(!RegStorage::IsFloat(reg));
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  code_buffer_.back() += RegStorage::RegNum(reg);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg)) {
-    reg = reg & X86_FP_REG_MASK;
-  }
-  if (reg >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg)
+  if (RegStorage::RegNum(reg) >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
+        << static_cast<int>(RegStorage::RegNum(reg))
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
-  DCHECK_LT(reg, 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -696,13 +747,10 @@
 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry,
                        uint8_t base, int disp, uint8_t reg) {
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg)) {
-    reg = reg & X86_FP_REG_MASK;
-  }
-  if (reg >= 4) {
+  if (RegStorage::RegNum(reg) >= 4) {
     DCHECK(strchr(entry->name, '8') == NULL ||
            entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM)
-        << entry->name << " " << static_cast<int>(reg)
+        << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg))
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
   EmitModrmDisp(reg, base, disp);
@@ -720,34 +768,36 @@
 void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index,
                               int scale, int disp) {
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg)) {
-    reg = reg & X86_FP_REG_MASK;
-  }
   EmitModrmSibDisp(reg, base, index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
-void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
-                  uint8_t reg) {
+void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
+                              int disp, uint8_t reg) {
   // Opcode will flip operands.
   EmitRegArray(entry, reg, base, index, scale, disp);
 }
 
+void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
+                              int disp, int32_t imm) {
+  EmitPrefixAndOpcode(entry);
+  EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg)) {
-    reg = reg & X86_FP_REG_MASK;
-  }
-  if (reg >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg)
+  if (RegStorage::RegNum(reg) >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
+        << static_cast<int>(RegStorage::RegNum(reg))
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
-  DCHECK_LT(reg, 8);
-  uint8_t modrm = (0 << 6) | (reg << 3) | rBP;
-  code_buffer_.push_back(modrm);
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  EmitModrmThread(RegStorage::RegNum(reg));
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -759,15 +809,9 @@
 
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) {
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg1)) {
-    reg1 = reg1 & X86_FP_REG_MASK;
-  }
-  if (X86_FPREG(reg2)) {
-    reg2 = reg2 & X86_FP_REG_MASK;
-  }
-  DCHECK_LT(reg1, 8);
-  DCHECK_LT(reg2, 8);
-  uint8_t modrm = (3 << 6) | (reg1 << 3) | reg2;
+  DCHECK_LT(RegStorage::RegNum(reg1), 8);
+  DCHECK_LT(RegStorage::RegNum(reg2), 8);
+  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -777,15 +821,9 @@
 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
                           uint8_t reg1, uint8_t reg2, int32_t imm) {
   EmitPrefixAndOpcode(entry);
-  if (X86_FPREG(reg1)) {
-    reg1 = reg1 & X86_FP_REG_MASK;
-  }
-  if (X86_FPREG(reg2)) {
-    reg2 = reg2 & X86_FP_REG_MASK;
-  }
-  DCHECK_LT(reg1, 8);
-  DCHECK_LT(reg2, 8);
-  uint8_t modrm = (3 << 6) | (reg1 << 3) | reg2;
+  DCHECK_LT(RegStorage::RegNum(reg1), 8);
+  DCHECK_LT(RegStorage::RegNum(reg2), 8);
+  uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -800,31 +838,26 @@
 void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
                                uint8_t reg, uint8_t base, int disp, int32_t imm) {
   EmitPrefixAndOpcode(entry);
-  DCHECK(!X86_FPREG(reg));
-  DCHECK_LT(reg, 8);
+  DCHECK(!RegStorage::IsFloat(reg));
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
   EmitModrmDisp(reg, base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
 }
 
+void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
+                               uint8_t base, int disp, uint8_t reg, int32_t imm) {
+  EmitRegMemImm(entry, reg, base, disp, imm);
+}
+
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
-  if (reg == rAX && entry->skeleton.ax_opcode != 0) {
+  EmitPrefix(entry);
+  if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
     EmitOpcode(entry);
-    if (X86_FPREG(reg)) {
-      reg = reg & X86_FP_REG_MASK;
-    }
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
     code_buffer_.push_back(modrm);
   }
   EmitImm(entry, imm);
@@ -839,8 +872,7 @@
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rBP;
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -850,8 +882,8 @@
 }
 
 void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  DCHECK_LT(reg, 8);
-  code_buffer_.push_back(0xB8 + reg);
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg));
   code_buffer_.push_back(imm & 0xFF);
   code_buffer_.push_back((imm >> 8) & 0xFF);
   code_buffer_.push_back((imm >> 16) & 0xFF);
@@ -869,12 +901,13 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  if (reg >= 4) {
-    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg)
+  if (RegStorage::RegNum(reg) >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
+        << static_cast<int>(RegStorage::RegNum(reg))
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
-  DCHECK_LT(reg, 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
   code_buffer_.push_back(modrm);
   if (imm != 1) {
     DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
@@ -884,14 +917,14 @@
 }
 
 void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rCX));
+  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
   EmitPrefix(entry);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(reg, 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
   code_buffer_.push_back(modrm);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -899,19 +932,53 @@
 
 void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
                                 int displacement, uint8_t cl) {
-  DCHECK_EQ(cl, static_cast<uint8_t>(rCX));
+  DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
   EmitPrefix(entry);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(base, 8);
+  DCHECK_LT(RegStorage::RegNum(base), 8);
   EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
+                                int displacement, int imm) {
+  EmitPrefix(entry);
+  if (imm != 1) {
+    code_buffer_.push_back(entry->skeleton.opcode);
+  } else {
+    // Shorter encoding for 1 bit shift
+    code_buffer_.push_back(entry->skeleton.ax_opcode);
+  }
+  DCHECK_NE(0x0F, entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  if (imm != 1) {
+    DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
+    DCHECK(IS_SIMM8(imm));
+    code_buffer_.push_back(imm & 0xFF);
+  }
+}
+
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
+  EmitPrefix(entry);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0x0F, entry->skeleton.opcode);
+  code_buffer_.push_back(0x0F);
+  DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
+  code_buffer_.push_back(0x90 | condition);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
+  code_buffer_.push_back(modrm);
+  DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
+}
+
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
     if (entry->skeleton.prefix2 != 0) {
@@ -926,13 +993,12 @@
   DCHECK_EQ(0x90, entry->skeleton.extra_opcode1);
   code_buffer_.push_back(0x90 | condition);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_LT(reg, 8);
-  uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
-  code_buffer_.push_back(modrm);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition) {
+void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
+                                uint8_t condition) {
   // Generate prefix and opcode without the condition
   EmitPrefixAndOpcode(entry);
 
@@ -945,17 +1011,35 @@
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
 
   // Check that registers requested for encoding are sane.
-  DCHECK_LT(reg1, 8);
-  DCHECK_LT(reg2, 8);
+  DCHECK_LT(RegStorage::RegNum(reg1), 8);
+  DCHECK_LT(RegStorage::RegNum(reg2), 8);
 
   // For register to register encoding, the mod is 3.
   const uint8_t mod = (3 << 6);
 
   // Encode the ModR/M byte now.
-  const uint8_t modrm = mod | (reg1 << 3) | reg2;
+  const uint8_t modrm = mod | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
   code_buffer_.push_back(modrm);
 }
 
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) {
+  // Generate prefix and opcode without the condition
+  EmitPrefixAndOpcode(entry);
+
+  // Now add the condition. The last byte of opcode is the one that receives it.
+  DCHECK_LE(condition, 0xF);
+  code_buffer_.back() += condition;
+
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+
+  // Check that registers requested for encoding are sane.
+  DCHECK_LT(reg1, 8);
+  DCHECK_LT(base, 8);
+
+  EmitModrmDisp(reg1, base, displacement);
+}
+
 void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
   if (entry->opcode == kX86Jmp8) {
     DCHECK(IS_SIMM8(rel));
@@ -975,8 +1059,8 @@
     DCHECK(entry->opcode == kX86JmpR);
     code_buffer_.push_back(entry->skeleton.opcode);
     uint8_t reg = static_cast<uint8_t>(rel);
-    DCHECK_LT(reg, 8);
-    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+    DCHECK_LT(RegStorage::RegNum(reg), 8);
+    uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
     code_buffer_.push_back(modrm);
   }
 }
@@ -1018,8 +1102,7 @@
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rBP;
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -1042,26 +1125,23 @@
     disp = tab_rec->offset;
   }
   EmitPrefix(entry);
-  if (X86_FPREG(reg)) {
-    reg = reg & X86_FP_REG_MASK;
-  }
-  DCHECK_LT(reg, 8);
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
   if (entry->opcode == kX86PcRelLoadRA) {
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
     DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-    uint8_t modrm = (2 << 6) | (reg << 3) | rX86_SP;
+    uint8_t modrm = (2 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rX86_SP.GetRegNum();
     code_buffer_.push_back(modrm);
     DCHECK_LT(scale, 4);
-    DCHECK_LT(index, 8);
-    DCHECK_LT(base_or_table, 8);
+    DCHECK_LT(RegStorage::RegNum(index), 8);
+    DCHECK_LT(RegStorage::RegNum(base_or_table), 8);
     uint8_t base = static_cast<uint8_t>(base_or_table);
-    uint8_t sib = (scale << 6) | (index << 3) | base;
+    uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base);
     code_buffer_.push_back(sib);
     DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   } else {
-    code_buffer_.push_back(entry->skeleton.opcode + reg);
+    code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg));
   }
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1079,10 +1159,11 @@
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
 
-  DCHECK_LT(reg, 8);
-  code_buffer_.push_back(0x58 + reg);  // pop reg
+  DCHECK_LT(RegStorage::RegNum(reg), 8);
+  code_buffer_.push_back(0x58 + RegStorage::RegNum(reg));  // pop reg
 
-  EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], reg, offset + 5 /* size of call +0 */);
+  EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg),
+             offset + 5 /* size of call +0 */);
 }
 
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
@@ -1262,6 +1343,7 @@
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
         break;
+      case kReg64:
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
@@ -1271,12 +1353,18 @@
       case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
         EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
         break;
+      case kMemReg64:
       case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
         EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
       case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
         EmitMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kArrayImm:  // lir operands - 0: base, 1: index, 2: disp, 3:scale, 4:immediate
+        EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                     lir->operands[3], lir->operands[4]);
+        break;
+      case kArrayReg64:
       case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
         EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
@@ -1288,6 +1376,7 @@
         EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
+      case kReg64Thread:  // lir operands - 0: reg, 1: disp
       case kRegThread:  // lir operands - 0: reg, 1: disp
         EmitRegThread(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1300,6 +1389,10 @@
       case kRegRegImmRev:
         EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kMemRegImm:
+        EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                      lir->operands[3]);
+        break;
       case kRegRegImm:
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
@@ -1307,6 +1400,7 @@
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
+      case kReg64Imm:
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1319,6 +1413,9 @@
       case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
         EmitShiftRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2:immediate
+        EmitShiftMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kShiftRegCl:  // lir operands - 0: reg, 1: cl
         EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1328,9 +1425,15 @@
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kMemCond:  // lir operands - 0: base, 1: displacement, 2: condition
+        EmitMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: condition
         EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kRegMemCond:  // lir operands - 0: reg, 1: reg, displacement, 3: condition
+        EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
+        break;
       case kJmp:  // lir operands - 0: rel
         if (entry->opcode == kX86JmpT) {
           // This works since the instruction format for jmp and call is basically the same and
@@ -1410,8 +1513,28 @@
 void X86Mir2Lir::AssignOffsets() {
   int offset = AssignInsnOffsets();
 
+  if (const_vectors_ != nullptr) {
+    /* assign offsets to vector literals */
+
+    // First, get offset to 12 mod 16 to align to 16 byte boundary.
+    // This will ensure that the vector is 16 byte aligned, as the procedure is
+    // always aligned at at 4 mod 16.
+    int align_size = (16-4) - (offset & 0xF);
+    if (align_size < 0) {
+      align_size += 16;
+    }
+
+    offset += align_size;
+
+    // Now assign each literal the right offset.
+    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+      p->offset = offset;
+      offset += 16;
+    }
+  }
+
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 06cc861..4673cc0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -152,16 +152,23 @@
     LoadValueDirect(rl_method, rs_rX86_ARG2);
     store_method_addr_used_ = true;
   } else {
-    NewLIR1(kX86StartOfMethod, rX86_ARG2);
+    NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg());
   }
-  NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec));
-  NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
-                          rs_rX86_ARG1, true);
+  NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
+  NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  } else {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  }
 }
 
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
+  int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+      Thread::ExceptionOffset<8>().Int32Value() :
+      Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
   NewLIR2(kX86Mov32TI, ex_offset, 0);
@@ -175,7 +182,14 @@
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
-  NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), Thread::CardTableOffset<4>().Int32Value());
+  int ct_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+      Thread::CardTableOffset<8>().Int32Value() :
+      Thread::CardTableOffset<4>().Int32Value();
+  if (Gen64Bit()) {
+    NewLIR2(kX86Mov64RT, reg_card_base.GetReg(), ct_offset);
+  } else {
+    NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
+  }
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -191,13 +205,12 @@
    * expanding the frame or flushing.  This leaves the utility
    * code with no spare temps.
    */
-  LockTemp(rX86_ARG0);
-  LockTemp(rX86_ARG1);
-  LockTemp(rX86_ARG2);
+  LockTemp(rs_rX86_ARG0);
+  LockTemp(rs_rX86_ARG1);
+  LockTemp(rs_rX86_ARG2);
 
   /* Build frame, return address already on stack */
-  // TODO: 64 bit.
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - 4);
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -222,10 +235,14 @@
         GenerateTargetLabel(kPseudoThrowTarget);
         m2l_->OpRegImm(kOpAdd, rs_rX86_SP, sp_displace_);
         m2l_->ClobberCallerSave();
-        ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
         // Assumes codegen and target are in thumb2 mode.
-        m2l_->CallHelper(RegStorage::InvalidReg(), func_offset, false /* MarkSafepointPC */,
-                         false /* UseLink */);
+        if (Is64BitInstructionSet(cu_->instruction_set)) {
+          m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow),
+                           false /* MarkSafepointPC */, false /* UseLink */);
+        } else {
+          m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow),
+                           false /* MarkSafepointPC */, false /* UseLink */);
+        }
       }
 
      private:
@@ -239,25 +256,31 @@
     // mov esp, ebp
     // in case a signal comes in that's not using an alternate signal stack and the large frame may
     // have moved us outside of the reserved area at the end of the stack.
-    // cmp rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    OpRegThreadMem(kOpCmp, rX86_SP, Thread::StackEndOffset<4>());
+    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
+    } else {
+      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
+    }
     LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_ - 4));
+    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch,
+                                                 frame_size_ -
+                                                 GetInstructionSetPointerSize(cu_->instruction_set)));
   }
 
   FlushIns(ArgLocs, rl_method);
 
   if (base_of_code_ != nullptr) {
     // We have been asked to save the address of the method start for later use.
-    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg());
     int displacement = SRegOffset(base_of_code_->s_reg_low);
     // Native pointer - must be natural word size.
     setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0);
   }
 
-  FreeTemp(rX86_ARG0);
-  FreeTemp(rX86_ARG1);
-  FreeTemp(rX86_ARG2);
+  FreeTemp(rs_rX86_ARG0);
+  FreeTemp(rs_rX86_ARG1);
+  FreeTemp(rs_rX86_ARG2);
 }
 
 void X86Mir2Lir::GenExitSequence() {
@@ -265,13 +288,13 @@
    * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
    */
-  LockTemp(rX86_RET0);
-  LockTemp(rX86_RET1);
+  LockTemp(rs_rX86_RET0);
+  LockTemp(rs_rX86_RET1);
 
   NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
   /* Remove frame except for return address */
-  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - 4);
+  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
   NewLIR0(kX86Ret);
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 760290c..72cdbbd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -22,42 +22,38 @@
 
 namespace art {
 
-class X86Mir2Lir FINAL : public Mir2Lir {
+class X86Mir2Lir : public Mir2Lir {
   public:
-    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
     // Required for target - codegen helpers.
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
-    RegStorage LoadHelper(ThreadOffset<4> offset);
-    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
-                      int s_reg);
-    LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg);
+    RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
+    RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE;
+    LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) OVERRIDE;
+    LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                      OpSize size) OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
-                         OpSize size);
-    // TODO: collapse r_dest, r_dest_hi
+                         OpSize size) OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                             RegStorage r_dest, RegStorage r_dest_hi, OpSize size, int s_reg);
+                             RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
     LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
-    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src);
+    LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
+                               OpSize size) OVERRIDE;
+    LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+                       OpSize size) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
-                          OpSize size);
-    // TODO: collapse r_src, r_src_hi
+                          OpSize size) OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                              RegStorage r_src, RegStorage r_src_hi, OpSize size, int s_reg);
+                              RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    bool IsFpReg(int reg);
-    bool IsFpReg(RegStorage reg);
-    bool SameRegType(int reg1, int reg2);
-    RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
-    RegStorage AllocTypedTempWide(bool fp_hint, int reg_class);
-    int S2d(int low_reg, int high_reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -66,16 +62,13 @@
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
-    uint32_t FpRegMask();
-    uint64_t GetRegMaskCommon(int reg);
+    uint64_t GetRegMaskCommon(RegStorage reg);
     void AdjustSpillMask();
     void ClobberCallerSave();
-    void FlushReg(RegStorage reg);
-    void FlushRegWide(RegStorage reg);
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, int reg);
+    void MarkPreservedSingle(int v_reg, RegStorage reg);
+    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
@@ -93,6 +86,11 @@
     int GetInsnSize(LIR* lir);
     bool IsUnconditionalBranch(LIR* lir);
 
+    // Check support for volatile load/store of a given size.
+    bool SupportsVolatileLoadStore(OpSize size) OVERRIDE;
+    // Get the register class for load/store of a field.
+    RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
+
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2);
@@ -120,6 +118,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -127,6 +126,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     // TODO: collapse reg_lo, reg_hi
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
@@ -141,7 +142,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                        int first_bit, int second_bit);
@@ -175,8 +176,8 @@
       * @param op The DEX opcode for the operation.
       * @param is_commutative The sources can be swapped if needed.
       */
-    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                      Instruction::Code op, bool is_commutative);
+    virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                              Instruction::Code op, bool is_commutative);
 
     /**
       * @brief Generate a two operand long arithmetic operation.
@@ -192,7 +193,7 @@
       * @param rl_src The other operand.  May be in a register or in memory.
       * @param op The DEX opcode for the operation.
       */
-    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
     /**
      * @brief Implement instanceof a final class with x86 specific code.
@@ -245,14 +246,17 @@
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
-    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+    LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE;
+    LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE;
     LIR* OpVldm(RegStorage r_base, int count);
     LIR* OpVstm(RegStorage r_base, int count);
     void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
     void OpRegCopyWide(RegStorage dest, RegStorage src);
-    void OpTlsCmp(ThreadOffset<4> offset, int val);
+    void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE;
+    void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE;
 
-    void OpRegThreadMem(OpKind op, int r_dest, ThreadOffset<4> thread_offset);
+    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
+    void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
     void SpillCoreRegs();
     void UnSpillCoreRegs();
     static const X86EncodingMap EncodingMap[kX86Last];
@@ -261,11 +265,11 @@
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
 
-    RegLocation UpdateLocWide(RegLocation loc);
-    RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
-    RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
-    RegStorage AllocTempDouble();
-    void ResetDefLocWide(RegLocation rl);
+    /*
+     * @brief Should try to optimize for two address instructions?
+     * @return true if we try to avoid generating three operand instructions.
+     */
+    virtual bool GenerateTwoOperandInstructions() const { return true; }
 
     /*
      * @brief x86 specific codegen for int operations.
@@ -307,7 +311,7 @@
      * @param type How the method will be invoked.
      * @returns Call instruction
      */
-    LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+    virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
 
     /*
      * @brief Handle x86 specific literals
@@ -326,11 +330,13 @@
      */
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
-  private:
+  protected:
+    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
     void EmitPrefixAndOpcode(const X86EncodingMap* entry);
     void EmitDisp(uint8_t base, int disp);
+    void EmitModrmThread(uint8_t reg_or_opcode);
     void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
     void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
     void EmitImm(const X86EncodingMap* entry, int imm);
@@ -345,19 +351,24 @@
                       int scale, int disp);
     void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
                       uint8_t reg);
+    void EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp,
+                      int32_t imm);
     void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
     void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
     void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
     void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
     void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp,
                        int32_t imm);
+    void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm);
     void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm);
     void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
+    void EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition);
 
     /**
      * @brief Used for encoding conditional register to register operation.
@@ -368,6 +379,16 @@
      */
     void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition);
 
+    /**
+     * @brief Used for encoding conditional register to memory operation.
+     * @param entry The entry in the encoding map for the opcode.
+     * @param reg1 The first physical register.
+     * @param base The memory base register.
+     * @param displacement The memory displacement.
+     * @param condition The condition code for operation.
+     */
+    void EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition);
+
     void EmitJmp(const X86EncodingMap* entry, int rel);
     void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
     void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
@@ -379,12 +400,17 @@
     void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
                                   int64_t val, ConditionCode ccode);
-    void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
     void GenConstWide(RegLocation rl_dest, int64_t value);
 
     static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
 
     /*
+     * @brief Ensure that a temporary register is byte addressable.
+     * @returns a temporary guarenteed to be byte addressable.
+     */
+    virtual RegStorage AllocateByteRegister();
+
+    /*
      * @brief generate inline code for fast case of Strng.indexOf.
      * @param info Call parameters
      * @param zero_based 'true' if the index into the string is 0.
@@ -394,6 +420,22 @@
     bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
 
     /*
+     * @brief Load 128 bit constant into vector register.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector
+     * @note vA is the TypeSize for the register.
+     * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
+     */
+    void GenConst128(BasicBlock* bb, MIR* mir);
+
+    /*
+     * @brief Generate code for a vector opcode.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is a non-standard opcode.
+     */
+    void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+
+    /*
      * @brief Return the correct x86 opcode for the Dex operation
      * @param op Dex opcode for the operation
      * @param loc Register location of the operand
@@ -504,7 +546,7 @@
      * @param rl_src The source of the long.
      * @param is_double 'true' if dealing with double, 'false' for float.
      */
-    void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+    virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
 
     /*
      * @brief Perform MIR analysis before compiling method.
@@ -513,6 +555,19 @@
     void Materialize();
 
     /*
+     * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
+     * without regard to data type.  In practice, this can result in UpdateLoc returning a
+     * location record for a Dalvik float value in a core register, and vis-versa.  For targets
+     * which can inexpensively move data between core and float registers, this can often be a win.
+     * However, for x86 this is generally not a win.  These variants of UpdateLoc()
+     * take a register class argument - and will return an in-register location record only if
+     * the value is live in a temp register of the correct class.  Additionally, if the value is in
+     * a temp register of the wrong register class, it will be clobbered.
+     */
+    RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
+    RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
+
+    /*
      * @brief Analyze MIR before generating code, to prepare for the code generation.
      */
     void AnalyzeMIR();
@@ -537,7 +592,7 @@
      * @param bb Basic block containing instruction.
      * @param mir Instruction to analyze.
      */
-    void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+    virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
 
     /*
      * @brief Analyze one MIR float/double instruction
@@ -553,6 +608,8 @@
      */
     void AnalyzeDoubleUse(RegLocation rl_use);
 
+    bool Gen64Bit() const  { return gen64bit_; }
+
     // Information derived from analysis of MIR
 
     // The compiler temporary for the code address of the method.
@@ -581,6 +638,25 @@
 
     // Epilogue increment of stack pointer.
     LIR* stack_increment_;
+
+    // 64-bit mode
+    bool gen64bit_;
+
+    // The list of const vector literals.
+    LIR *const_vectors_;
+
+    /*
+     * @brief Search for a matching vector literal
+     * @param mir A kMirOpConst128b MIR instruction to match.
+     * @returns pointer to matching LIR constant, or nullptr if not found.
+     */
+    LIR *ScanVectorLiteral(MIR *mir);
+
+    /*
+     * @brief Add a constant vector literal
+     * @param mir A kMirOpConst128b MIR instruction to match.
+     */
+    LIR *AddVectorLiteral(MIR *mir);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index f7b0c9d..aec39ab 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -49,8 +49,13 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturn(true);
       StoreValue(rl_dest, rl_result);
       return;
@@ -67,7 +72,7 @@
   RegStorage r_src1 = rl_src1.reg;
   RegStorage r_src2 = rl_src2.reg;
   if (r_dest == r_src2) {
-    r_src2 = AllocTempFloat();
+    r_src2 = AllocTempSingle();
     OpRegCopy(r_src2, r_dest);
   }
   OpRegCopy(r_dest, r_src1);
@@ -77,6 +82,12 @@
 
 void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_dest.fp);
+  DCHECK(rl_src1.wide);
+  DCHECK(rl_src1.fp);
+  DCHECK(rl_src2.wide);
+  DCHECK(rl_src2.fp);
   X86OpCode op = kX86Nop;
   RegLocation rl_result;
 
@@ -100,8 +111,13 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturnWide(true);
       StoreValueWide(rl_dest, rl_result);
       return;
@@ -112,22 +128,14 @@
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
   rl_src1 = LoadValueWide(rl_src1, kFPReg);
-  DCHECK(rl_src1.wide);
   rl_src2 = LoadValueWide(rl_src2, kFPReg);
-  DCHECK(rl_src2.wide);
   rl_result = EvalLoc(rl_dest, kFPReg, true);
-  DCHECK(rl_dest.wide);
-  DCHECK(rl_result.wide);
-  // TODO: update with direct 64-bit reg.
-  int r_dest = S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg());
-  int r_src1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
-  int r_src2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg());
-  if (r_dest == r_src2) {
-    r_src2 = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE;
-    OpRegCopy(RegStorage::Solo64(r_src2), RegStorage::Solo64(r_dest));
+  if (rl_result.reg == rl_src2.reg) {
+    rl_src2.reg = AllocTempDouble();
+    OpRegCopy(rl_src2.reg, rl_result.reg);
   }
-  OpRegCopy(RegStorage::Solo64(r_dest), RegStorage::Solo64(r_src1));
-  NewLIR2(op, r_dest, r_src2);
+  OpRegCopy(rl_result.reg, rl_src1.reg);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -141,27 +149,25 @@
 
   // If the source is in physical register, then put it in its location on stack.
   if (rl_src.location == kLocPhysReg) {
-    RegisterInfo* lo_info = GetRegInfo(rl_src.reg.GetLowReg());
+    RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
 
-    if (lo_info != nullptr && lo_info->is_temp) {
+    if (reg_info != nullptr && reg_info->IsTemp()) {
       // Calling FlushSpecificReg because it will only write back VR if it is dirty.
-      FlushSpecificReg(lo_info);
-      // ResetDef for low/high to prevent NullifyRange from removing stores.
-      ResetDef(rl_src.reg.GetLowReg());
-      if (rl_src.reg.GetLowReg() != rl_src.reg.GetHighReg() && GetRegInfo(rl_src.reg.GetHighReg()) != nullptr) {
-        ResetDef(rl_src.reg.GetHighReg());
-      }
+      FlushSpecificReg(reg_info);
+      // ResetDef to prevent NullifyRange from removing stores.
+      ResetDef(rl_src.reg);
     } else {
       // It must have been register promoted if it is not a temp but is still in physical
       // register. Since we need it to be in memory to convert, we place it there now.
-      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.reg);
+      StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64);
     }
   }
 
   // Push the source virtual register onto the x87 stack.
-  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(), src_v_reg_offset + LOWORD_OFFSET);
+  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(),
+                              src_v_reg_offset + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
-      true /* is_load */, true /* is64bit */);
+                          true /* is_load */, true /* is64bit */);
 
   // Now pop off x87 stack and store it in the destination VR's stack location.
   int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
@@ -177,7 +183,8 @@
    * If the result's location is in memory, then we do not need to do anything
    * more since the fstp has already placed the correct value in memory.
    */
-  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
   if (rl_result.location == kLocPhysReg) {
     /*
      * We already know that the result is in a physical register but do not know if it is the
@@ -187,7 +194,7 @@
     if (is_double) {
       rl_result = EvalLocWide(rl_dest, kFPReg, true);
 
-      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, INVALID_SREG);
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
 
       StoreFinalValueWide(rl_dest, rl_result);
     } else {
@@ -204,7 +211,6 @@
                                RegLocation rl_src) {
   RegisterClass rcSrc = kFPReg;
   X86OpCode op = kX86Nop;
-  int src_reg;
   RegLocation rl_result;
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
@@ -225,18 +231,17 @@
       break;
     case Instruction::FLOAT_TO_INT: {
       rl_src = LoadValue(rl_src, kFPReg);
-      src_reg = rl_src.reg.GetReg();
       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
       ClobberSReg(rl_dest.s_reg_low);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      int temp_reg = AllocTempFloat().GetReg();
+      RegStorage temp_reg = AllocTempSingle();
 
       LoadConstant(rl_result.reg, 0x7fffffff);
-      NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.reg.GetReg());
-      NewLIR2(kX86ComissRR, src_reg, temp_reg);
+      NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+      NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), src_reg);
+      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
@@ -247,18 +252,17 @@
     }
     case Instruction::DOUBLE_TO_INT: {
       rl_src = LoadValueWide(rl_src, kFPReg);
-      src_reg = rl_src.reg.GetLowReg();
       // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
       ClobberSReg(rl_dest.s_reg_low);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
-      int temp_reg = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE;
+      RegStorage temp_reg = AllocTempDouble();
 
       LoadConstant(rl_result.reg, 0x7fffffff);
-      NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.reg.GetReg());
-      NewLIR2(kX86ComisdRR, src_reg, temp_reg);
+      NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+      NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
       LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
       LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
-      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), src_reg);
+      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
       LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
       branch_na_n->target = NewLIR0(kPseudoTargetLabel);
       NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
@@ -274,28 +278,34 @@
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      }
       return;
     default:
       LOG(INFO) << "Unexpected opcode: " << opcode;
   }
+  // At this point, target will be either float or double.
+  DCHECK(rl_dest.fp);
   if (rl_src.wide) {
     rl_src = LoadValueWide(rl_src, rcSrc);
-    src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
   } else {
     rl_src = LoadValue(rl_src, rcSrc);
-    src_reg = rl_src.reg.GetReg();
   }
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   if (rl_dest.wide) {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), src_reg);
     StoreValueWide(rl_dest, rl_result);
   } else {
-    rl_result = EvalLoc(rl_dest, kFPReg, true);
-    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -304,34 +314,28 @@
                           RegLocation rl_src1, RegLocation rl_src2) {
   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
   bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
-  int src_reg1;
-  int src_reg2;
   if (single) {
     rl_src1 = LoadValue(rl_src1, kFPReg);
-    src_reg1 = rl_src1.reg.GetReg();
     rl_src2 = LoadValue(rl_src2, kFPReg);
-    src_reg2 = rl_src2.reg.GetReg();
   } else {
     rl_src1 = LoadValueWide(rl_src1, kFPReg);
-    src_reg1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
     rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    src_reg2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg());
   }
   // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
   ClobberSReg(rl_dest.s_reg_low);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
   if (single) {
-    NewLIR2(kX86UcomissRR, src_reg1, src_reg2);
+    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
-    NewLIR2(kX86UcomisdRR, src_reg1, src_reg2);
+    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   }
   LIR* branch = NULL;
   if (unordered_gt) {
     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
   }
   // If the result reg can't be byte accessed, use a jump and move instead of a set.
-  if (rl_result.reg.GetReg() >= 4) {
+  if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) {
     LIR* branch2 = NULL;
     if (unordered_gt) {
       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
@@ -363,8 +367,7 @@
     rl_src2 = mir_graph_->GetSrcWide(mir, 2);
     rl_src1 = LoadValueWide(rl_src1, kFPReg);
     rl_src2 = LoadValueWide(rl_src2, kFPReg);
-    NewLIR2(kX86UcomisdRR, S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()),
-            S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
+    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
   } else {
     rl_src1 = mir_graph_->GetSrc(mir, 0);
     rl_src2 = mir_graph_->GetSrc(mir, 1);
@@ -442,8 +445,7 @@
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
-  NewLIR2(kX86SqrtsdRR, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
-          S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()));
+  NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 96c4cbe..48bff6e 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -33,18 +33,18 @@
                             RegLocation rl_src2) {
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
-  RegStorage r_tmp1(RegStorage::k64BitPair, r0, r1);
-  RegStorage r_tmp2(RegStorage::k64BitPair, r2, r3);
+  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
+  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
   LoadValueDirectWideFixed(rl_src1, r_tmp1);
   LoadValueDirectWideFixed(rl_src2, r_tmp2);
   // Compute (r1:r0) = (r1:r0) - (r3:r2)
   OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
   OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
-  NewLIR2(kX86Set8R, r2, kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
-  NewLIR2(kX86Movzx8RR, r2, r2);
+  NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
+  NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
   OpReg(kOpNeg, rs_r2);         // r2 = -r2
   OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
-  NewLIR2(kX86Set8R, r0, kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
+  NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
   NewLIR2(kX86Movzx8RR, r0, r0);
   OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
   RegLocation rl_result = LocCReturn();
@@ -106,7 +106,7 @@
   if (r_src.IsPair()) {
     r_src = r_src.GetLow();
   }
-  if (X86_FPREG(r_dest.GetReg()) || X86_FPREG(r_src.GetReg()))
+  if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
   LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR,
                     r_dest.GetReg(), r_src.GetReg());
@@ -125,31 +125,30 @@
 
 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
   if (r_dest != r_src) {
-    // FIXME: handle k64BitSolo when we start using them.
-    DCHECK(r_dest.IsPair());
-    DCHECK(r_src.IsPair());
-    bool dest_fp = X86_FPREG(r_dest.GetLowReg());
-    bool src_fp = X86_FPREG(r_src.GetLowReg());
+    bool dest_fp = r_dest.IsFloat();
+    bool src_fp = r_src.IsFloat();
     if (dest_fp) {
       if (src_fp) {
-        // TODO: we ought to handle this case here - reserve OpRegCopy for 32-bit copies.
-        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
+        OpRegCopy(r_dest, r_src);
       } else {
         // TODO: Prevent this from happening in the code. The result is often
         // unused or could have been loaded more easily from memory.
-        NewLIR2(kX86MovdxrRR, r_dest.GetLowReg(), r_src.GetLowReg());
+        NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
         RegStorage r_tmp = AllocTempDouble();
-        NewLIR2(kX86MovdxrRR, r_tmp.GetLowReg(), r_src.GetHighReg());
-        NewLIR2(kX86PunpckldqRR, r_dest.GetLowReg(), r_tmp.GetLowReg());
+        NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
+        NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
         FreeTemp(r_tmp);
       }
     } else {
       if (src_fp) {
-        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetLowReg());
-        NewLIR2(kX86PsrlqRI, r_src.GetLowReg(), 32);
-        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetLowReg());
+        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
+        RegStorage temp_reg = AllocTempDouble();
+        NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+        NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
       } else {
+        DCHECK(r_dest.IsPair());
+        DCHECK(r_src.IsPair());
         // Handle overlap
         if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
           // Deal with cycles.
@@ -289,8 +288,8 @@
 
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
-  RegStorage r_tmp1(RegStorage::k64BitPair, r0, r1);
-  RegStorage r_tmp2(RegStorage::k64BitPair, r2, r3);
+  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
+  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
   LoadValueDirectWideFixed(rl_src1, r_tmp1);
   LoadValueDirectWideFixed(rl_src2, r_tmp2);
   // Swap operands and condition code to prevent use of zero flag.
@@ -328,49 +327,60 @@
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+  if (is_equality_test && val != 0) {
+    rl_src1 = ForceTempWide(rl_src1);
+  }
   RegStorage low_reg = rl_src1.reg.GetLow();
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
+  if (is_equality_test) {
+    // We can simpolify of comparing for ==, != to 0.
+    if (val == 0) {
+      if (IsTemp(low_reg)) {
+        OpRegReg(kOpOr, low_reg, high_reg);
+        // We have now changed it; ignore the old values.
+        Clobber(rl_src1.reg);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+        FreeTemp(t_reg);
+      }
+      OpCondBranch(ccode, taken);
+      return;
+    }
+
+    // Need to compute the actual value for ==, !=.
+    OpRegImm(kOpSub, low_reg, val_lo);
+    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+    OpRegReg(kOpOr, high_reg, low_reg);
+    Clobber(rl_src1.reg);
+  } else if (ccode == kCondLe || ccode == kCondGt) {
+    // Swap operands and condition code to prevent use of zero flag.
+    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+    LoadConstantWide(tmp, val);
+    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
+    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
+    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
+    FreeTemp(tmp);
+  } else {
+    // We can use a compare for the low word to set CF.
+    OpRegImm(kOpCmp, low_reg, val_lo);
+    if (IsTemp(high_reg)) {
+      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+      // We have now changed it; ignore the old values.
+      Clobber(rl_src1.reg);
+    } else {
+      // mov temp_reg, high_reg; sbb temp_reg, high_constant
+      RegStorage t_reg = AllocTemp();
+      OpRegCopy(t_reg, high_reg);
+      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
+      FreeTemp(t_reg);
+    }
   }
 
-  OpRegImm(kOpCmp, high_reg, val_hi);
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+  OpCondBranch(ccode, taken);
 }
 
 void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
@@ -452,8 +462,7 @@
   LockCallTemps();  // Prepare for explicit register usage.
 
   // Assume that the result will be in EDX.
-  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rs_r2,
-                           INVALID_SREG, INVALID_SREG};
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
 
   // handle div/rem by 1 special case.
   if (imm == 1) {
@@ -516,7 +525,7 @@
       // We will need the value later.
       if (rl_src.location == kLocPhysReg) {
         // We can use it directly.
-        DCHECK(rl_src.reg.GetReg() != r0 && rl_src.reg.GetReg() != r2);
+        DCHECK(rl_src.reg.GetReg() != rs_r0.GetReg() && rl_src.reg.GetReg() != rs_r2.GetReg());
         numerator_reg = rl_src.reg;
       } else {
         numerator_reg = rs_r1;
@@ -532,21 +541,21 @@
     LoadConstantNoClobber(rs_r2, magic);
 
     // EDX:EAX = magic & dividend.
-    NewLIR1(kX86Imul32DaR, r2);
+    NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
 
     if (imm > 0 && magic < 0) {
       // Add numerator to EDX.
       DCHECK(numerator_reg.Valid());
-      NewLIR2(kX86Add32RR, r2, numerator_reg.GetReg());
+      NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
     } else if (imm < 0 && magic > 0) {
       DCHECK(numerator_reg.Valid());
-      NewLIR2(kX86Sub32RR, r2, numerator_reg.GetReg());
+      NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
     }
 
     // Do we need the shift?
     if (shift != 0) {
       // Shift EDX by 'shift' bits.
-      NewLIR2(kX86Sar32RI, r2, shift);
+      NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
     }
 
     // Add 1 to EDX if EDX < 0.
@@ -555,10 +564,10 @@
     OpRegCopy(rs_r0, rs_r2);
 
     // Move sign bit to bit 0, zeroing the rest.
-    NewLIR2(kX86Shr32RI, r2, 31);
+    NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
 
     // EDX = EDX + EAX.
-    NewLIR2(kX86Add32RR, r2, r0);
+    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
 
     // Quotient is in EDX.
     if (!is_div) {
@@ -571,7 +580,7 @@
       OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
 
       // EDX -= EAX.
-      NewLIR2(kX86Sub32RR, r0, r2);
+      NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
 
       // For this case, return the result in EAX.
       rl_result.reg.SetReg(r0);
@@ -625,12 +634,11 @@
   // Expected case.
   minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
   minint_branch->target = minus_one_branch->target;
-  NewLIR1(kX86Idivmod32DaR, r1);
+  NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
   done->target = NewLIR0(kPseudoTargetLabel);
 
   // Result is in EAX for div and EDX for rem.
-  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rs_r0,
-                           INVALID_SREG, INVALID_SREG};
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
   if (!is_div) {
     rl_result.reg.SetReg(r2);
   }
@@ -682,14 +690,12 @@
   RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  // Unaligned access is allowed on x86.
+  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
   if (size == k64) {
-    // Unaligned access is allowed on x86.
-    LoadBaseDispWide(rl_address.reg, 0, rl_result.reg, INVALID_SREG);
     StoreValueWide(rl_dest, rl_result);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned access is allowed on x86.
-    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
   }
   return true;
@@ -703,7 +709,7 @@
   if (size == k64) {
     // Unaligned access is allowed on x86.
     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDispWide(rl_address.reg, 0, rl_value.reg);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
   } else {
     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned access is allowed on x86.
@@ -718,6 +724,12 @@
 }
 
 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
+  DCHECK_EQ(kX86, cu_->instruction_set);
+  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
+}
+
+void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
+  DCHECK_EQ(kX86_64, cu_->instruction_set);
   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
 }
 
@@ -741,16 +753,16 @@
     // TODO: CFI support.
     FlushAllRegs();
     LockCallTemps();
-    RegStorage r_tmp1(RegStorage::k64BitPair, rAX, rDX);
-    RegStorage r_tmp2(RegStorage::k64BitPair, rBX, rCX);
+    RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
+    RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
-    NewLIR1(kX86Push32R, rDI);
-    MarkTemp(rDI);
-    LockTemp(rDI);
-    NewLIR1(kX86Push32R, rSI);
-    MarkTemp(rSI);
-    LockTemp(rSI);
+    NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    MarkTemp(rs_rDI);
+    LockTemp(rs_rDI);
+    NewLIR1(kX86Push32R, rs_rSI.GetReg());
+    MarkTemp(rs_rSI);
+    LockTemp(rs_rSI);
     const int push_offset = 4 /* push edi */ + 4 /* push esi */;
     int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0
                 : (IsInReg(this, rl_src_obj, rs_rDI) ? 4
@@ -761,22 +773,23 @@
                    : (IsInReg(this, rl_src_offset, rs_rDI) ? 4
                    : (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
     LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI);
-    NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0);
+    NewLIR4(kX86LockCmpxchg8bA, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0);
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
     GenMemBarrier(kStoreLoad);
 
-    FreeTemp(rSI);
-    UnmarkTemp(rSI);
-    NewLIR1(kX86Pop32R, rSI);
-    FreeTemp(rDI);
-    UnmarkTemp(rDI);
-    NewLIR1(kX86Pop32R, rDI);
+    FreeTemp(rs_rSI);
+    UnmarkTemp(rs_rSI);
+    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    FreeTemp(rs_rDI);
+    UnmarkTemp(rs_rDI);
+    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
     FreeCallTemps();
   } else {
     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
     FlushReg(rs_r0);
+    Clobber(rs_r0);
     LockTemp(rs_r0);
 
     RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
@@ -784,9 +797,9 @@
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
-      FreeTemp(r0);  // Temporarily release EAX for MarkGCCard().
+      FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
       MarkGCCard(rl_new_value.reg, rl_object.reg);
-      LockTemp(r0);
+      LockTemp(rs_r0);
     }
 
     RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
@@ -797,7 +810,7 @@
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
     GenMemBarrier(kStoreLoad);
 
-    FreeTemp(r0);
+    FreeTemp(rs_r0);
   }
 
   // Convert ZF to boolean
@@ -896,8 +909,13 @@
       }
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    new_index, m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -931,8 +949,13 @@
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -949,7 +972,11 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
-  OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
+  } else {
+    OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
+  }
   return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
 }
 
@@ -1000,11 +1027,11 @@
       NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
       break;
     case 1:
-      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, sreg);
+      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32);
       break;
     default:
-      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), rX86_SP,
-                  displacement, val);
+      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
+                  rs_rX86_SP.GetReg(), displacement, val);
       AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
       break;
   }
@@ -1047,7 +1074,7 @@
     int32_t val_hi = High32Bits(val);
     FlushAllRegs();
     LockCallTemps();  // Prepare for explicit register usage.
-    rl_src1 = UpdateLocWide(rl_src1);
+    rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
     bool src1_in_reg = rl_src1.location == kLocPhysReg;
     int displacement = SRegOffset(rl_src1.s_reg_low);
 
@@ -1062,7 +1089,7 @@
     }
 
     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, r1, r0);
+    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
 
     // EAX <- 2L
     LoadConstantNoClobber(rs_r0, val_lo);
@@ -1071,18 +1098,17 @@
     if (src1_in_reg) {
       NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
     } else {
-      LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+      LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                               true /* is_load */, true /* is_64bit */);
     }
 
     // EDX <- EDX + ECX (add high words)
-    NewLIR2(kX86Add32RR, r2, r1);
+    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
 
     // Result is EDX:EAX
-    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
-                             RegStorage::MakeRegPair(rs_r0, rs_r2),
-                             INVALID_SREG, INVALID_SREG};
+    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+                             RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
     StoreValueWide(rl_dest, rl_result);
     return;
   }
@@ -1094,8 +1120,8 @@
 
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage.
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // At this point, the VRs are in their home locations.
   bool src1_in_reg = rl_src1.location == kLocPhysReg;
@@ -1103,65 +1129,65 @@
 
   // ECX <- 1H
   if (src1_in_reg) {
-    NewLIR2(kX86Mov32RR, r1, rl_src1.reg.GetHighReg());
+    NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
   } else {
-    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1,
-                 k32, GetSRegHi(rl_src1.s_reg_low));
+    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32);
   }
 
   if (is_square) {
     // Take advantage of the fact that the values are the same.
     // ECX <- ECX * 2L  (1H * 2L)
     if (src2_in_reg) {
-      NewLIR2(kX86Imul32RR, r1, rl_src2.reg.GetLowReg());
+      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
     } else {
       int displacement = SRegOffset(rl_src2.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
+                       displacement + LOWORD_OFFSET);
       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                               true /* is_load */, true /* is_64bit */);
     }
 
     // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, r1, r1);
+    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
   } else {
     // EAX <- 2H
     if (src2_in_reg) {
-      NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetHighReg());
+      NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
     } else {
-      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0,
-                   k32, GetSRegHi(rl_src2.s_reg_low));
+      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32);
     }
 
     // EAX <- EAX * 1L  (2H * 1L)
     if (src1_in_reg) {
-      NewLIR2(kX86Imul32RR, r0, rl_src1.reg.GetLowReg());
+      NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
     } else {
       int displacement = SRegOffset(rl_src1.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET);
+      LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
+                       displacement + LOWORD_OFFSET);
       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                               true /* is_load */, true /* is_64bit */);
     }
 
     // ECX <- ECX * 2L  (1H * 2L)
     if (src2_in_reg) {
-      NewLIR2(kX86Imul32RR, r1, rl_src2.reg.GetLowReg());
+      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
     } else {
       int displacement = SRegOffset(rl_src2.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
+                       displacement + LOWORD_OFFSET);
       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                               true /* is_load */, true /* is_64bit */);
     }
 
     // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
-    NewLIR2(kX86Add32RR, r1, r0);
+    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
   }
 
   // EAX <- 2L
   if (src2_in_reg) {
-    NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetLowReg());
+    NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
   } else {
-    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0,
-                 k32, rl_src2.s_reg_low);
+    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32);
   }
 
   // EDX:EAX <- 2L * 1L (double precision)
@@ -1169,16 +1195,16 @@
     NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
   } else {
     int displacement = SRegOffset(rl_src1.s_reg_low);
-    LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+    LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                             true /* is_load */, true /* is_64bit */);
   }
 
   // EDX <- EDX + ECX (add high words)
-  NewLIR2(kX86Add32RR, r2, r1);
+  NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
 
   // Result is EDX:EAX
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
                            RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
@@ -1190,7 +1216,7 @@
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    rl_src = EvalLocWide(rl_src, kCoreReg, true);
+    rl_src = LoadValueWide(rl_src, kCoreReg);
     if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
       // The registers are the same, so we would clobber it before the use.
       RegStorage temp_reg = AllocTemp();
@@ -1221,12 +1247,12 @@
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   if (rl_dest.location == kLocPhysReg) {
     // Ensure we are in a register pair
     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
 
-    rl_src = UpdateLocWide(rl_src);
+    rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src, op);
     StoreFinalValueWide(rl_dest, rl_result);
     return;
@@ -1266,8 +1292,12 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
-      GenLongArith(rl_dest, rl_src2, op);
-      return;
+      if (GenerateTwoOperandInstructions()) {
+        GenLongArith(rl_dest, rl_src2, op);
+        return;
+      }
+      break;
+
     default:
       break;
   }
@@ -1279,7 +1309,7 @@
     rl_result = ForceTempWide(rl_result);
 
     // Perform the operation using the RHS.
-    rl_src2 = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src2, op);
 
     // And now record that the result is in the temp.
@@ -1290,17 +1320,17 @@
   // It wasn't in registers, so it better be in memory.
   DCHECK((rl_dest.location == kLocDalvikFrame) ||
          (rl_dest.location == kLocCompilerTemp));
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (IsTemp(rl_src1.reg.GetLowReg()) && IsTemp(rl_src1.reg.GetHighReg())) {
+  if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
     GenLongRegOrMemOp(rl_src1, rl_src2, op);
   } else if (is_commutative) {
     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     // We need at least one of them to be a temporary.
-    if (!(IsTemp(rl_src2.reg.GetLowReg()) && IsTemp(rl_src2.reg.GetHighReg()))) {
+    if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
       rl_src1 = ForceTempWide(rl_src1);
       GenLongRegOrMemOp(rl_src1, rl_src2, op);
     } else {
@@ -1342,6 +1372,15 @@
   GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
+void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = ForceTempWide(rl_src);
@@ -1358,7 +1397,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset<4> thread_offset) {
+void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
+  DCHECK_EQ(kX86, cu_->instruction_set);
   X86OpCode opcode = kX86Bkpt;
   switch (op) {
   case kOpCmp: opcode = kX86Cmp32RT;  break;
@@ -1367,7 +1407,30 @@
     LOG(FATAL) << "Bad opcode: " << op;
     break;
   }
-  NewLIR2(opcode, r_dest, thread_offset.Int32Value());
+  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
+}
+
+void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
+  DCHECK_EQ(kX86_64, cu_->instruction_set);
+  X86OpCode opcode = kX86Bkpt;
+  if (Gen64Bit() && r_dest.Is64BitSolo()) {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp64RT;  break;
+    case kOpMov: opcode = kX86Mov64RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
+      break;
+    }
+  } else {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp32RT;  break;
+    case kOpMov: opcode = kX86Mov32RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+    }
+  }
+  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
 }
 
 /*
@@ -1375,7 +1438,7 @@
  */
 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   RegLocation rl_result;
   rl_array = LoadValue(rl_array, kCoreReg);
@@ -1410,13 +1473,10 @@
     }
   }
   rl_result = EvalLoc(rl_dest, reg_class, true);
+  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
   if ((size == k64) || (size == kDouble)) {
-    LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg.GetLow(),
-                        rl_result.reg.GetHigh(), size, INVALID_SREG);
     StoreValueWide(rl_dest, rl_result);
   } else {
-    LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg,
-                        RegStorage::InvalidReg(), size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1427,7 +1487,7 @@
  */
 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  RegisterClass reg_class = oat_reg_class_by_size(size);
+  RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
 
@@ -1466,24 +1526,18 @@
     rl_src = LoadValue(rl_src, reg_class);
   }
   // If the src reg can't be byte accessed, move it to a temp first.
-  if ((size == kSignedByte || size == kUnsignedByte) && rl_src.reg.GetReg() >= 4) {
+  if ((size == kSignedByte || size == kUnsignedByte) &&
+      rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
     RegStorage temp = AllocTemp();
     OpRegCopy(temp, rl_src.reg);
-    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp,
-                         RegStorage::InvalidReg(), size, INVALID_SREG);
+    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
   } else {
-    if (rl_src.wide) {
-      StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg.GetLow(),
-                           rl_src.reg.GetHigh(), size, INVALID_SREG);
-    } else {
-      StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg,
-                           RegStorage::InvalidReg(), size, INVALID_SREG);
-    }
+    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
   }
   if (card_mark) {
     // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
     if (!constant_index) {
-      FreeTemp(rl_index.reg.GetReg());
+      FreeTemp(rl_index.reg);
     }
     MarkGCCard(rl_src.reg, rl_array.reg);
   }
@@ -1491,7 +1545,7 @@
 
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   switch (opcode) {
     case Instruction::SHL_LONG:
     case Instruction::SHL_LONG_2ADDR:
@@ -1501,7 +1555,6 @@
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else if (shift_amount > 31) {
         OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        FreeTemp(rl_src.reg.GetHighReg());
         NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else {
@@ -1601,7 +1654,11 @@
     case Instruction::XOR_LONG_2ADDR:
     case Instruction::AND_LONG_2ADDR:
       if (rl_src2.is_const) {
-        GenLongImm(rl_dest, rl_src2, opcode);
+        if (GenerateTwoOperandInstructions()) {
+          GenLongImm(rl_dest, rl_src2, opcode);
+        } else {
+          GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        }
       } else {
         DCHECK(rl_src1.is_const);
         GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
@@ -1679,7 +1736,7 @@
                                 int32_t value) {
   bool in_mem = loc.location != kLocPhysReg;
   bool byte_imm = IS_SIMM8(value);
-  DCHECK(in_mem || !IsFpReg(loc.reg));
+  DCHECK(in_mem || !loc.reg.IsFloat());
   switch (op) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
@@ -1734,7 +1791,7 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
 
   // Can we just do this into memory?
   if ((rl_dest.location == kLocDalvikFrame) ||
@@ -1763,7 +1820,7 @@
 
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   DCHECK_EQ(rl_result.location, kLocPhysReg);
-  DCHECK(!IsFpReg(rl_result.reg));
+  DCHECK(!rl_result.reg.IsFloat());
 
   if (!IsNoOp(op, val_lo)) {
     X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
@@ -1782,14 +1839,13 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
-  rl_src1 = UpdateLocWide(rl_src1);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
 
   // Can we do this directly into the destination registers?
   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
       rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
-      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() &&
-      !IsFpReg(rl_dest.reg)) {
+      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
     if (!IsNoOp(op, val_lo)) {
       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
       NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
@@ -1829,9 +1885,9 @@
   RegStorage result_reg = rl_result.reg;
 
   // SETcc only works with EAX..EDX.
-  if (result_reg == object.reg || result_reg.GetReg() >= 4) {
-    result_reg = AllocTypedTemp(false, kCoreReg);
-    DCHECK_LT(result_reg.GetReg(), 4);
+  if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+    result_reg = AllocateByteRegister();
+    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
 
   // Assume that there is no match.
@@ -1842,8 +1898,8 @@
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
-  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
-    (sizeof(mirror::Class*) * type_idx);
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+    (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
 
   if (rl_method.location == kLocPhysReg) {
     if (use_declaring_class) {
@@ -1897,8 +1953,13 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // Caller function returns Class* in kArg0.
-    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                         type_idx, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    } else {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    }
     OpRegCopy(class_reg, TargetReg(kRet0));
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
   } else if (use_declaring_class) {
@@ -1911,14 +1972,18 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
         * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
+      } else {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      }
       OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
       LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
       // Rejoin code paths
@@ -1930,7 +1995,7 @@
   RegLocation rl_result = GetReturn(false);
 
   // SETcc only works with EAX..EDX.
-  DCHECK_LT(rl_result.reg.GetReg(), 4);
+  DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
   // Is the class NULL?
   LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
@@ -1952,7 +2017,11 @@
       branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
     }
     OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
+    } else {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    }
   }
   // TODO: only clobber when type isn't final?
   ClobberCallerSave();
@@ -2058,12 +2127,16 @@
       LOG(FATAL) << "Invalid word arith op: " << opcode;
   }
 
-    // Can we convert to a two address instruction?
+  // Can we convert to a two address instruction?
   if (!is_two_addr &&
         (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
          mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
-      is_two_addr = true;
-    }
+    is_two_addr = true;
+  }
+
+  if (!GenerateTwoOperandInstructions()) {
+    is_two_addr = false;
+  }
 
   // Get the div/rem stuff out of the way.
   if (is_div_rem) {
@@ -2074,7 +2147,7 @@
 
   if (unary) {
     rl_lhs = LoadValue(rl_lhs, kCoreReg);
-    rl_result = UpdateLoc(rl_dest);
+    rl_result = UpdateLocTyped(rl_dest, kCoreReg);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   } else {
@@ -2084,14 +2157,14 @@
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         rl_rhs = LoadValue(rl_rhs, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory
           OpMemReg(op, rl_result, t_reg.GetReg());
           FreeTemp(t_reg);
           return;
-        } else if (!IsFpReg(rl_result.reg.GetReg())) {
+        } else if (!rl_result.reg.IsFloat()) {
           // Can do this directly into the result register
           OpRegReg(op, rl_result.reg, t_reg);
           FreeTemp(t_reg);
@@ -2108,28 +2181,32 @@
       // Multiply is 3 operand only (sort of).
       if (is_two_addr && op != kOpMul) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location == kLocPhysReg) {
           // Ensure res is in a core reg
           rl_result = EvalLoc(rl_dest, kCoreReg, true);
           // Can we do this from memory directly?
-          rl_rhs = UpdateLoc(rl_rhs);
+          rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
           if (rl_rhs.location != kLocPhysReg) {
             OpRegMem(op, rl_result.reg, rl_rhs);
             StoreFinalValue(rl_dest, rl_result);
             return;
-          } else if (!IsFpReg(rl_rhs.reg)) {
+          } else if (!rl_rhs.reg.IsFloat()) {
             OpRegReg(op, rl_result.reg, rl_rhs.reg);
             StoreFinalValue(rl_dest, rl_result);
             return;
           }
         }
         rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        // It might happen rl_rhs and rl_dest are the same VR
+        // in this case rl_dest is in reg after LoadValue while
+        // rl_result is not updated yet, so do this
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory.
           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
           return;
-        } else if (!IsFpReg(rl_result.reg)) {
+        } else if (!rl_result.reg.IsFloat()) {
           // Can do this directly into the result register.
           OpRegReg(op, rl_result.reg, rl_rhs.reg);
           StoreFinalValue(rl_dest, rl_result);
@@ -2141,8 +2218,8 @@
         }
       } else {
         // Try to use reg/memory instructions.
-        rl_lhs = UpdateLoc(rl_lhs);
-        rl_rhs = UpdateLoc(rl_rhs);
+        rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
+        rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
         // We can't optimize with FP registers.
         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
           // Something is difficult, so fall back to the standard case.
@@ -2160,6 +2237,8 @@
             if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
               rl_lhs = LoadValue(rl_lhs, kCoreReg);
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              // No-op if these are the same.
+              OpRegCopy(rl_result.reg, rl_lhs.reg);
             } else {
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
               LoadValueDirect(rl_lhs, rl_result.reg);
@@ -2195,10 +2274,10 @@
 
 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
   // If we have non-core registers, then we can't do good things.
-  if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.reg.GetReg())) {
+  if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
     return false;
   }
-  if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.reg.GetReg())) {
+  if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
     return false;
   }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 3e3fa72..e7a629a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -26,27 +26,118 @@
 
 namespace art {
 
-// FIXME: restore "static" when usage uncovered
-/*static*/ int core_regs[] = {
-  rAX, rCX, rDX, rBX, rX86_SP, rBP, rSI, rDI
+static const RegStorage core_regs_arr_32[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
+};
+static const RegStorage core_regs_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
-  r8, r9, r10, r11, r12, r13, r14, 15
+    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
-/*static*/ int ReservedRegs[] = {rX86_SP};
-/*static*/ int core_temps[] = {rAX, rCX, rDX, rBX};
-/*static*/ int FpRegs[] = {
-  fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+static const RegStorage sp_regs_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_regs_arr_64[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
-  fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15
+    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-/*static*/ int fp_temps[] = {
-  fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+static const RegStorage dp_regs_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_regs_arr_64[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
-  fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15
+    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
+static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
+static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
+static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
+static const RegStorage core_temps_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
+#ifdef TARGET_REX_SUPPORT
+    rs_r8, rs_r9, rs_r10, rs_r11
+#endif
+};
+static const RegStorage sp_temps_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_temps_arr_64[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+#ifdef TARGET_REX_SUPPORT
+    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
+#endif
+};
+static const RegStorage dp_temps_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_temps_arr_64[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+#ifdef TARGET_REX_SUPPORT
+    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
+#endif
+};
+
+static const std::vector<RegStorage> empty_pool;
+static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
+    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
+static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
+    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
+static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
+    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
+static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
+    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
+static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
+    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
+static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
+    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
+static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
+    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
+static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
+    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
+static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
+    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
+static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
+    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
+static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
+    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
+static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
+    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
+static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
+    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
+static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
+    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
+
+RegStorage rs_rX86_SP;
+
+X86NativeRegisterPool rX86_ARG0;
+X86NativeRegisterPool rX86_ARG1;
+X86NativeRegisterPool rX86_ARG2;
+X86NativeRegisterPool rX86_ARG3;
+X86NativeRegisterPool rX86_FARG0;
+X86NativeRegisterPool rX86_FARG1;
+X86NativeRegisterPool rX86_FARG2;
+X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_RET0;
+X86NativeRegisterPool rX86_RET1;
+X86NativeRegisterPool rX86_INVOKE_TGT;
+X86NativeRegisterPool rX86_COUNT;
+
+RegStorage rs_rX86_ARG0;
+RegStorage rs_rX86_ARG1;
+RegStorage rs_rX86_ARG2;
+RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_FARG0;
+RegStorage rs_rX86_FARG1;
+RegStorage rs_rX86_FARG2;
+RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_RET0;
+RegStorage rs_rX86_RET1;
+RegStorage rs_rX86_INVOKE_TGT;
+RegStorage rs_rX86_COUNT;
 
 RegLocation X86Mir2Lir::LocCReturn() {
   return x86_loc_c_return;
@@ -66,29 +157,29 @@
 
 // Return a target-dependent special register.
 RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
-  int res_reg = RegStorage::kInvalidRegVal;
+  RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
-    case kSelf: res_reg = rX86_SELF; break;
-    case kSuspend: res_reg =  rX86_SUSPEND; break;
-    case kLr: res_reg =  rX86_LR; break;
-    case kPc: res_reg =  rX86_PC; break;
-    case kSp: res_reg =  rX86_SP; break;
-    case kArg0: res_reg = rX86_ARG0; break;
-    case kArg1: res_reg = rX86_ARG1; break;
-    case kArg2: res_reg = rX86_ARG2; break;
-    case kArg3: res_reg = rX86_ARG3; break;
-    case kFArg0: res_reg = rX86_FARG0; break;
-    case kFArg1: res_reg = rX86_FARG1; break;
-    case kFArg2: res_reg = rX86_FARG2; break;
-    case kFArg3: res_reg = rX86_FARG3; break;
-    case kRet0: res_reg = rX86_RET0; break;
-    case kRet1: res_reg = rX86_RET1; break;
-    case kInvokeTgt: res_reg = rX86_INVOKE_TGT; break;
-    case kHiddenArg: res_reg = rAX; break;
-    case kHiddenFpArg: res_reg = fr0; break;
-    case kCount: res_reg = rX86_COUNT; break;
+    case kSelf: res_reg = RegStorage::InvalidReg(); break;
+    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
+    case kLr: res_reg =  RegStorage::InvalidReg(); break;
+    case kPc: res_reg =  RegStorage::InvalidReg(); break;
+    case kSp: res_reg =  rs_rX86_SP; break;
+    case kArg0: res_reg = rs_rX86_ARG0; break;
+    case kArg1: res_reg = rs_rX86_ARG1; break;
+    case kArg2: res_reg = rs_rX86_ARG2; break;
+    case kArg3: res_reg = rs_rX86_ARG3; break;
+    case kFArg0: res_reg = rs_rX86_FARG0; break;
+    case kFArg1: res_reg = rs_rX86_FARG1; break;
+    case kFArg2: res_reg = rs_rX86_FARG2; break;
+    case kFArg3: res_reg = rs_rX86_FARG3; break;
+    case kRet0: res_reg = rs_rX86_RET0; break;
+    case kRet1: res_reg = rs_rX86_RET1; break;
+    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
+    case kHiddenArg: res_reg = rs_rAX; break;
+    case kHiddenFpArg: res_reg = rs_fr0; break;
+    case kCount: res_reg = rs_rX86_COUNT; break;
   }
-  return RegStorage::Solo32(res_reg);
+  return res_reg;
 }
 
 RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
@@ -106,34 +197,19 @@
   }
 }
 
-// Create a double from a pair of singles.
-int X86Mir2Lir::S2d(int low_reg, int high_reg) {
-  return X86_S2D(low_reg, high_reg);
-}
-
-// Return mask to strip off fp reg flags and bias.
-uint32_t X86Mir2Lir::FpRegMask() {
-  return X86_FP_REG_MASK;
-}
-
-// True if both regs single, both core or both double.
-bool X86Mir2Lir::SameRegType(int reg1, int reg2) {
-  return (X86_REGTYPE(reg1) == X86_REGTYPE(reg2));
-}
-
 /*
  * Decode the register id.
  */
-uint64_t X86Mir2Lir::GetRegMaskCommon(int reg) {
+uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
   uint64_t seed;
   int shift;
   int reg_id;
 
-  reg_id = reg & 0xf;
+  reg_id = reg.GetRegNum();
   /* Double registers in x86 are just a single FP register */
   seed = 1;
   /* FP register starts at bit position 16 */
-  shift = X86_FPREG(reg) ? kX86FPReg0 : 0;
+  shift = reg.IsFloat() ? kX86FPReg0 : 0;
   /* Expand the double register id into single offset */
   shift += reg_id;
   return (seed << shift);
@@ -162,34 +238,34 @@
   }
 
   if (flags & REG_DEFA) {
-    SetupRegMask(&lir->u.m.def_mask, rAX);
+    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
   }
 
   if (flags & REG_DEFD) {
-    SetupRegMask(&lir->u.m.def_mask, rDX);
+    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
   }
   if (flags & REG_USEA) {
-    SetupRegMask(&lir->u.m.use_mask, rAX);
+    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
   }
 
   if (flags & REG_USEC) {
-    SetupRegMask(&lir->u.m.use_mask, rCX);
+    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
   }
 
   if (flags & REG_USED) {
-    SetupRegMask(&lir->u.m.use_mask, rDX);
+    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
   }
 
   if (flags & REG_USEB) {
-    SetupRegMask(&lir->u.m.use_mask, rBX);
+    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
   }
 
   // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
   if (lir->opcode == kX86RepneScasw) {
-    SetupRegMask(&lir->u.m.use_mask, rAX);
-    SetupRegMask(&lir->u.m.use_mask, rCX);
-    SetupRegMask(&lir->u.m.use_mask, rDI);
-    SetupRegMask(&lir->u.m.def_mask, rDI);
+    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
+    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
+    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
+    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
   }
 
   if (flags & USE_FP_STACK) {
@@ -261,12 +337,13 @@
             break;
           }
           case 'r':
-            if (X86_FPREG(operand) || X86_DOUBLEREG(operand)) {
-              int fp_reg = operand & X86_FP_REG_MASK;
+            if (RegStorage::IsFloat(operand)) {
+              int fp_reg = RegStorage::RegNum(operand);
               buf += StringPrintf("xmm%d", fp_reg);
             } else {
-              DCHECK_LT(static_cast<size_t>(operand), sizeof(x86RegName));
-              buf += x86RegName[operand];
+              int reg_num = RegStorage::RegNum(operand);
+              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
+              buf += x86RegName[reg_num];
             }
             break;
           case 't':
@@ -329,7 +406,7 @@
 
 void X86Mir2Lir::AdjustSpillMask() {
   // Adjustment for LR spilling, x86 has no LR so nothing to do here
-  core_spill_mask_ |= (1 << rRET);
+  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
   num_core_spills_++;
 }
 
@@ -339,97 +416,60 @@
  * include any holes in the mask.  Associate holes with
  * Dalvik register INVALID_VREG (0xFFFFU).
  */
-void X86Mir2Lir::MarkPreservedSingle(int v_reg, int reg) {
-  UNIMPLEMENTED(WARNING) << "MarkPreservedSingle";
-#if 0
-  LOG(FATAL) << "No support yet for promoted FP regs";
-#endif
+void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
+  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
 }
 
-void X86Mir2Lir::FlushRegWide(RegStorage reg) {
-  RegisterInfo* info1 = GetRegInfo(reg.GetLowReg());
-  RegisterInfo* info2 = GetRegInfo(reg.GetHighReg());
-  DCHECK(info1 && info2 && info1->pair && info2->pair &&
-         (info1->partner == info2->reg) &&
-         (info2->partner == info1->reg));
-  if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) {
-    if (!(info1->is_temp && info2->is_temp)) {
-      /* Should not happen.  If it does, there's a problem in eval_loc */
-      LOG(FATAL) << "Long half-temp, half-promoted";
-    }
-
-    info1->dirty = false;
-    info2->dirty = false;
-    if (mir_graph_->SRegToVReg(info2->s_reg) < mir_graph_->SRegToVReg(info1->s_reg))
-      info1 = info2;
-    int v_reg = mir_graph_->SRegToVReg(info1->s_reg);
-    StoreBaseDispWide(rs_rX86_SP, VRegOffset(v_reg),
-                      RegStorage(RegStorage::k64BitPair, info1->reg, info1->partner));
-  }
+void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
+  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
 }
 
-void X86Mir2Lir::FlushReg(RegStorage reg) {
-  // FIXME: need to handle 32 bits in 64-bit register as well as wide values held in single reg.
-  DCHECK(!reg.IsPair());
-  RegisterInfo* info = GetRegInfo(reg.GetReg());
-  if (info->live && info->dirty) {
-    info->dirty = false;
-    int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rX86_SP, VRegOffset(v_reg), reg, k32);
-  }
-}
-
-/* Give access to the target-dependent FP register encoding to common code */
-bool X86Mir2Lir::IsFpReg(int reg) {
-  return X86_FPREG(reg);
-}
-
-bool X86Mir2Lir::IsFpReg(RegStorage reg) {
-  return IsFpReg(reg.IsPair() ? reg.GetLowReg() : reg.GetReg());
+RegStorage X86Mir2Lir::AllocateByteRegister() {
+  return AllocTypedTemp(false, kCoreReg);
 }
 
 /* Clobber all regs that might be used by an external C call */
 void X86Mir2Lir::ClobberCallerSave() {
-  Clobber(rAX);
-  Clobber(rCX);
-  Clobber(rDX);
-  Clobber(rBX);
+  Clobber(rs_rAX);
+  Clobber(rs_rCX);
+  Clobber(rs_rDX);
+  Clobber(rs_rBX);
 }
 
 RegLocation X86Mir2Lir::GetReturnWideAlt() {
   RegLocation res = LocCReturnWide();
-  CHECK(res.reg.GetLowReg() == rAX);
-  CHECK(res.reg.GetHighReg() == rDX);
-  Clobber(rAX);
-  Clobber(rDX);
-  MarkInUse(rAX);
-  MarkInUse(rDX);
-  MarkPair(res.reg.GetLowReg(), res.reg.GetHighReg());
+  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
+  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
+  Clobber(rs_rAX);
+  Clobber(rs_rDX);
+  MarkInUse(rs_rAX);
+  MarkInUse(rs_rDX);
+  MarkWide(res.reg);
   return res;
 }
 
 RegLocation X86Mir2Lir::GetReturnAlt() {
   RegLocation res = LocCReturn();
-  res.reg.SetReg(rDX);
-  Clobber(rDX);
-  MarkInUse(rDX);
+  res.reg.SetReg(rs_rDX.GetReg());
+  Clobber(rs_rDX);
+  MarkInUse(rs_rDX);
   return res;
 }
 
 /* To be used when explicitly managing register use */
 void X86Mir2Lir::LockCallTemps() {
-  LockTemp(rX86_ARG0);
-  LockTemp(rX86_ARG1);
-  LockTemp(rX86_ARG2);
-  LockTemp(rX86_ARG3);
+  LockTemp(rs_rX86_ARG0);
+  LockTemp(rs_rX86_ARG1);
+  LockTemp(rs_rX86_ARG2);
+  LockTemp(rs_rX86_ARG3);
 }
 
 /* To be used when explicitly managing register use */
 void X86Mir2Lir::FreeCallTemps() {
-  FreeTemp(rX86_ARG0);
-  FreeTemp(rX86_ARG1);
-  FreeTemp(rX86_ARG2);
-  FreeTemp(rX86_ARG3);
+  FreeTemp(rs_rX86_ARG0);
+  FreeTemp(rs_rX86_ARG1);
+  FreeTemp(rs_rX86_ARG2);
+  FreeTemp(rs_rX86_ARG3);
 }
 
 bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
@@ -450,11 +490,12 @@
     return false;
 }
 
-void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
   LIR* mem_barrier = last_lir_insn_;
 
+  bool ret = false;
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
    * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
@@ -464,11 +505,13 @@
     // If no LIR exists already that can be used a barrier, then generate an mfence.
     if (mem_barrier == nullptr) {
       mem_barrier = NewLIR0(kX86Mfence);
+      ret = true;
     }
 
     // If last instruction does not provide full barrier, then insert an mfence.
     if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
       mem_barrier = NewLIR0(kX86Mfence);
+      ret = true;
     }
   }
 
@@ -480,70 +523,43 @@
     DCHECK(!mem_barrier->flags.use_def_invalid);
     mem_barrier->u.m.def_mask = ENCODE_ALL;
   }
+  return ret;
+#else
+  return false;
 #endif
 }
 
-// Alloc a pair of core registers, or a double.
-RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempDouble();
-  }
-  RegStorage low_reg = AllocTemp();
-  RegStorage high_reg = AllocTemp();
-  return RegStorage::MakeRegPair(low_reg, high_reg);
-}
-
-RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
-  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
-    return AllocTempFloat();
-  }
-  return AllocTemp();
-}
-
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
-  int num_regs = sizeof(core_regs)/sizeof(*core_regs);
-  int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs);
-  int num_temps = sizeof(core_temps)/sizeof(*core_temps);
-  int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs);
-  int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps);
-  reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_),
-                                                       kArenaAllocRegAlloc));
-  reg_pool_->num_core_regs = num_regs;
-  reg_pool_->core_regs =
-      static_cast<RegisterInfo*>(arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs),
-                                               kArenaAllocRegAlloc));
-  reg_pool_->num_fp_regs = num_fp_regs;
-  reg_pool_->FPRegs =
-      static_cast<RegisterInfo *>(arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs),
-                                                kArenaAllocRegAlloc));
-  CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs);
-  CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs);
-  // Keep special registers from being allocated
-  for (int i = 0; i < num_reserved; i++) {
-    MarkInUse(ReservedRegs[i]);
+  if (Gen64Bit()) {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, empty_pool, core_regs_64, sp_regs_64,
+                                          dp_regs_64, empty_pool, reserved_regs_64,
+                                          empty_pool, core_temps_64, sp_temps_64, dp_temps_64);
+  } else {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
+                                          dp_regs_32, reserved_regs_32, empty_pool,
+                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
   }
-  // Mark temp regs - all others not in use can be used for promotion
-  for (int i = 0; i < num_temps; i++) {
-    MarkTemp(core_temps[i]);
-  }
-  for (int i = 0; i < num_fp_temps; i++) {
-    MarkTemp(fp_temps[i]);
-  }
-}
 
-void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  int free_low = rl_free.reg.GetLowReg();
-  int free_high = rl_free.reg.GetHighReg();
-  int keep_low = rl_keep.reg.GetLowReg();
-  int keep_high = rl_keep.reg.GetHighReg();
-  if ((free_low != keep_low) && (free_low != keep_high) &&
-      (free_high != keep_low) && (free_high != keep_high)) {
-    // No overlap, free both
-    FreeTemp(free_low);
-    FreeTemp(free_high);
+  // Target-specific adjustments.
+
+  // Alias single precision xmm to double xmms.
+  // TODO: as needed, add larger vector sizes - alias all to the largest.
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    int sp_reg_num = info->GetReg().GetRegNum();
+    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
+    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
+    // 64-bit xmm vector register's master storage should refer to itself.
+    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
+    // Redirect 32-bit vector's master storage to 64-bit vector.
+    info->SetMaster(dp_reg_info);
   }
+
+  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
+  // TODO: adjust for x86/hard float calling convention.
+  reg_pool_->next_core_reg_ = 2;
+  reg_pool_->next_sp_reg_ = 2;
+  reg_pool_->next_dp_reg_ = 1;
 }
 
 void X86Mir2Lir::SpillCoreRegs() {
@@ -551,12 +567,12 @@
     return;
   }
   // Spill mask not including fake return address register
-  uint32_t mask = core_spill_mask_ & ~(1 << rRET);
-  int offset = frame_size_ - (4 * num_core_spills_);
+  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -566,12 +582,12 @@
     return;
   }
   // Spill mask not including fake return address register
-  uint32_t mask = core_spill_mask_ & ~(1 << rRET);
-  int offset = frame_size_ - (4 * num_core_spills_);
+  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -580,27 +596,89 @@
   return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
 }
 
-X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
+  return true;
+}
+
+RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  if (UNLIKELY(is_volatile)) {
+    // On x86, atomic 64-bit load/store requires an fp register.
+    // Smaller aligned load/store is atomic for both core and fp registers.
+    if (size == k64 || size == kDouble) {
+      return kFPReg;
+    }
+  }
+  return RegClassBySize(size);
+}
+
+X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
     : Mir2Lir(cu, mir_graph, arena),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena, 100, kGrowableArrayMisc),
       class_type_address_insns_(arena, 100, kGrowableArrayMisc),
       call_method_insns_(arena, 100, kGrowableArrayMisc),
-      stack_decrement_(nullptr), stack_increment_(nullptr) {
+      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
+      const_vectors_(nullptr) {
+  store_method_addr_used_ = false;
   if (kIsDebugBuild) {
     for (int i = 0; i < kX86Last; i++) {
       if (X86Mir2Lir::EncodingMap[i].opcode != i) {
         LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
-            << " is wrong: expecting " << i << ", seeing "
-            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
+                   << " is wrong: expecting " << i << ", seeing "
+                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
       }
     }
   }
+  if (Gen64Bit()) {
+    rs_rX86_SP = rs_rX86_SP_64;
+
+    rs_rX86_ARG0 = rs_rDI;
+    rs_rX86_ARG1 = rs_rSI;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rCX;
+    rX86_ARG0 = rDI;
+    rX86_ARG1 = rSI;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rCX;
+    // TODO: ARG4(r8), ARG5(r9), floating point args.
+  } else {
+    rs_rX86_SP = rs_rX86_SP_32;
+
+    rs_rX86_ARG0 = rs_rAX;
+    rs_rX86_ARG1 = rs_rCX;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rBX;
+    rX86_ARG0 = rAX;
+    rX86_ARG1 = rCX;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rBX;
+  }
+  rs_rX86_FARG0 = rs_rAX;
+  rs_rX86_FARG1 = rs_rCX;
+  rs_rX86_FARG2 = rs_rDX;
+  rs_rX86_FARG3 = rs_rBX;
+  rs_rX86_RET0 = rs_rAX;
+  rs_rX86_RET1 = rs_rDX;
+  rs_rX86_INVOKE_TGT = rs_rAX;
+  rs_rX86_COUNT = rs_rCX;
+  rX86_FARG0 = rAX;
+  rX86_FARG1 = rCX;
+  rX86_FARG2 = rDX;
+  rX86_FARG3 = rBX;
+  rX86_RET0 = rAX;
+  rX86_RET1 = rDX;
+  rX86_INVOKE_TGT = rAX;
+  rX86_COUNT = rCX;
 }
 
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena);
+  return new X86Mir2Lir(cu, mir_graph, arena, false);
+}
+
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena) {
+  return new X86Mir2Lir(cu, mir_graph, arena, true);
 }
 
 // Not used in x86
@@ -609,6 +687,12 @@
   return RegStorage::InvalidReg();
 }
 
+// Not used in x86
+RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
+  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
+  return RegStorage::InvalidReg();
+}
+
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
   LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
   return nullptr;
@@ -629,215 +713,6 @@
   return X86Mir2Lir::EncodingMap[opcode].fmt;
 }
 
-/*
- * Return an updated location record with current in-register status.
- * If the value lives in live temps, reflect that fact.  No code
- * is generated.  If the live value is part of an older pair,
- * clobber both low and high.
- */
-// TODO: Reunify with common code after 'pair mess' has been fixed
-RegLocation X86Mir2Lir::UpdateLocWide(RegLocation loc) {
-  DCHECK(loc.wide);
-  DCHECK(CheckCorePoolSanity());
-  if (loc.location != kLocPhysReg) {
-    DCHECK((loc.location == kLocDalvikFrame) ||
-         (loc.location == kLocCompilerTemp));
-    // Are the dalvik regs already live in physical registers?
-    RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
-
-    // Handle FP registers specially on x86.
-    if (info_lo && IsFpReg(info_lo->reg)) {
-      bool match = true;
-
-      // We can't match a FP register with a pair of Core registers.
-      match = match && (info_lo->pair == 0);
-
-      if (match) {
-        // We can reuse;update the register usage info.
-        loc.location = kLocPhysReg;
-        loc.vec_len = kVectorLength8;
-        // TODO: use k64BitVector
-        loc.reg = RegStorage(RegStorage::k64BitPair, info_lo->reg, info_lo->reg);
-        DCHECK(IsFpReg(loc.reg.GetLowReg()));
-        return loc;
-      }
-      // We can't easily reuse; clobber and free any overlaps.
-      if (info_lo) {
-        Clobber(info_lo->reg);
-        FreeTemp(info_lo->reg);
-        if (info_lo->pair)
-          Clobber(info_lo->partner);
-      }
-    } else {
-      RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg);
-      bool match = true;
-      match = match && (info_lo != NULL);
-      match = match && (info_hi != NULL);
-      // Are they both core or both FP?
-      match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg));
-      // If a pair of floating point singles, are they properly aligned?
-      if (match && IsFpReg(info_lo->reg)) {
-        match &= ((info_lo->reg & 0x1) == 0);
-        match &= ((info_hi->reg - info_lo->reg) == 1);
-      }
-      // If previously used as a pair, it is the same pair?
-      if (match && (info_lo->pair || info_hi->pair)) {
-        match = (info_lo->pair == info_hi->pair);
-        match &= ((info_lo->reg == info_hi->partner) &&
-              (info_hi->reg == info_lo->partner));
-      }
-      if (match) {
-        // Can reuse - update the register usage info
-        loc.reg = RegStorage(RegStorage::k64BitPair, info_lo->reg, info_hi->reg);
-        loc.location = kLocPhysReg;
-        MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-        DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
-        return loc;
-      }
-      // Can't easily reuse - clobber and free any overlaps
-      if (info_lo) {
-        Clobber(info_lo->reg);
-        FreeTemp(info_lo->reg);
-        if (info_lo->pair)
-          Clobber(info_lo->partner);
-      }
-      if (info_hi) {
-        Clobber(info_hi->reg);
-        FreeTemp(info_hi->reg);
-        if (info_hi->pair)
-          Clobber(info_hi->partner);
-      }
-    }
-  }
-  return loc;
-}
-
-// TODO: Reunify with common code after 'pair mess' has been fixed
-RegLocation X86Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
-  DCHECK(loc.wide);
-
-  loc = UpdateLocWide(loc);
-
-  /* If it is already in a register, we can assume proper form.  Is it the right reg class? */
-  if (loc.location == kLocPhysReg) {
-    DCHECK_EQ(IsFpReg(loc.reg.GetLowReg()), loc.IsVectorScalar());
-    if (!RegClassMatches(reg_class, loc.reg)) {
-      /* It is the wrong register class.  Reallocate and copy. */
-      if (!IsFpReg(loc.reg.GetLowReg())) {
-        // We want this in a FP reg, and it is in core registers.
-        DCHECK(reg_class != kCoreReg);
-        // Allocate this into any FP reg, and mark it with the right size.
-        int32_t low_reg = AllocTypedTemp(true, reg_class).GetReg();
-        OpVectorRegCopyWide(low_reg, loc.reg.GetLowReg(), loc.reg.GetHighReg());
-        CopyRegInfo(low_reg, loc.reg.GetLowReg());
-        Clobber(loc.reg);
-        loc.reg.SetReg(low_reg);
-        loc.reg.SetHighReg(low_reg);  // Play nice with existing code.
-        loc.vec_len = kVectorLength8;
-      } else {
-        // The value is in a FP register, and we want it in a pair of core registers.
-        DCHECK_EQ(reg_class, kCoreReg);
-        DCHECK_EQ(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-        RegStorage new_regs = AllocTypedTempWide(false, kCoreReg);  // Force to core registers.
-        OpRegCopyWide(new_regs, loc.reg);
-        CopyRegInfo(new_regs.GetLowReg(), loc.reg.GetLowReg());
-        CopyRegInfo(new_regs.GetHighReg(), loc.reg.GetHighReg());
-        Clobber(loc.reg);
-        loc.reg = new_regs;
-        MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-        DCHECK(!IsFpReg(loc.reg.GetLowReg()) || ((loc.reg.GetLowReg() & 0x1) == 0));
-      }
-    }
-    return loc;
-  }
-
-  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-  loc.reg = AllocTypedTempWide(loc.fp, reg_class);
-
-  // FIXME: take advantage of RegStorage notation.
-  if (loc.reg.GetLowReg() == loc.reg.GetHighReg()) {
-    DCHECK(IsFpReg(loc.reg.GetLowReg()));
-    loc.vec_len = kVectorLength8;
-  } else {
-    MarkPair(loc.reg.GetLowReg(), loc.reg.GetHighReg());
-  }
-  if (update) {
-    loc.location = kLocPhysReg;
-    MarkLive(loc.reg.GetLow(), loc.s_reg_low);
-    if (loc.reg.GetLowReg() != loc.reg.GetHighReg()) {
-      MarkLive(loc.reg.GetHigh(), GetSRegHi(loc.s_reg_low));
-    }
-  }
-  return loc;
-}
-
-// TODO: Reunify with common code after 'pair mess' has been fixed
-RegLocation X86Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
-  if (loc.wide)
-    return EvalLocWide(loc, reg_class, update);
-
-  loc = UpdateLoc(loc);
-
-  if (loc.location == kLocPhysReg) {
-    if (!RegClassMatches(reg_class, loc.reg)) {
-      /* Wrong register class.  Realloc, copy and transfer ownership. */
-      RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      OpRegCopy(new_reg, loc.reg);
-      CopyRegInfo(new_reg, loc.reg);
-      Clobber(loc.reg);
-      loc.reg = new_reg;
-      if (IsFpReg(loc.reg.GetReg()) && reg_class != kCoreReg)
-        loc.vec_len = kVectorLength4;
-    }
-    return loc;
-  }
-
-  DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-
-  loc.reg = AllocTypedTemp(loc.fp, reg_class);
-  if (IsFpReg(loc.reg.GetReg()) && reg_class != kCoreReg)
-    loc.vec_len = kVectorLength4;
-
-  if (update) {
-    loc.location = kLocPhysReg;
-    MarkLive(loc.reg, loc.s_reg_low);
-  }
-  return loc;
-}
-
-RegStorage X86Mir2Lir::AllocTempDouble() {
-  // We really don't need a pair of registers.
-  // FIXME - update to double
-  int reg = AllocTempFloat().GetReg();
-  return RegStorage(RegStorage::k64BitPair, reg, reg);
-}
-
-// TODO: Reunify with common code after 'pair mess' has been fixed
-void X86Mir2Lir::ResetDefLocWide(RegLocation rl) {
-  DCHECK(rl.wide);
-  RegisterInfo* p_low = IsTemp(rl.reg.GetLowReg());
-  if (IsFpReg(rl.reg.GetLowReg())) {
-    // We are using only the low register.
-    if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-      NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
-    }
-    ResetDef(rl.reg.GetLowReg());
-  } else {
-    RegisterInfo* p_high = IsTemp(rl.reg.GetHighReg());
-    if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-      DCHECK(p_low->pair);
-      NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
-    }
-    if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) {
-      DCHECK(p_high->pair);
-    }
-    ResetDef(rl.reg.GetLowReg());
-    ResetDef(rl.reg.GetHighReg());
-  }
-}
-
 void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
   // Can we do this directly to memory?
   rl_dest = UpdateLocWide(rl_dest);
@@ -872,7 +747,6 @@
              << (loc.ref ? " r" : "  ")
              << (loc.high_word ? " h" : "  ")
              << (loc.home ? " H" : "  ")
-             << " vec_len: " << loc.vec_len
              << ", low: " << static_cast<int>(loc.reg.GetLowReg())
              << ", high: " << static_cast<int>(loc.reg.GetHighReg())
              << ", s_reg: " << loc.s_reg_low
@@ -942,12 +816,46 @@
   return call;
 }
 
+/*
+ * @brief Enter a 32 bit quantity into a buffer
+ * @param buf buffer.
+ * @param data Data value.
+ */
+
+static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
+  buf.push_back(data & 0xff);
+  buf.push_back((data >> 8) & 0xff);
+  buf.push_back((data >> 16) & 0xff);
+  buf.push_back((data >> 24) & 0xff);
+}
+
 void X86Mir2Lir::InstallLiteralPools() {
   // These are handled differently for x86.
   DCHECK(code_literal_list_ == nullptr);
   DCHECK(method_literal_list_ == nullptr);
   DCHECK(class_literal_list_ == nullptr);
 
+  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
+  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
+  // will fail at runtime)?
+  if (const_vectors_ != nullptr) {
+    int align_size = (16-4) - (code_buffer_.size() & 0xF);
+    if (align_size < 0) {
+      align_size += 16;
+    }
+
+    while (align_size > 0) {
+      code_buffer_.push_back(0);
+      align_size--;
+    }
+    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+      PushWord(code_buffer_, p->operands[0]);
+      PushWord(code_buffer_, p->operands[1]);
+      PushWord(code_buffer_, p->operands[2]);
+      PushWord(code_buffer_, p->operands[3]);
+    }
+  }
+
   // Handle the fixups for methods.
   for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
       LIR* p = method_address_insns_.Get(i);
@@ -1059,7 +967,7 @@
 
   // We need to preserve EDI, but have no spare registers, so push it on the stack.
   // We have to remember that all stack addresses after this are offset by sizeof(EDI).
-  NewLIR1(kX86Push32R, rDI);
+  NewLIR1(kX86Push32R, rs_rDI.GetReg());
 
   // Compute the number of words to search in to rCX.
   Load32Disp(rs_rDX, count_offset, rs_rCX);
@@ -1084,7 +992,7 @@
       }
     } else {
       // Runtime start index.
-      rl_start = UpdateLoc(rl_start);
+      rl_start = UpdateLocTyped(rl_start, kCoreReg);
       if (rl_start.location == kLocPhysReg) {
         // Handle "start index < 0" case.
         OpRegReg(kOpXor, rs_rBX, rs_rBX);
@@ -1096,7 +1004,7 @@
         OpRegReg(kOpSub, rs_rCX, rl_start.reg);
         if (rl_start.reg == rs_rDI) {
           // The special case. We will use EDI further, so lets put start index to stack.
-          NewLIR1(kX86Push32R, rDI);
+          NewLIR1(kX86Push32R, rs_rDI.GetReg());
           is_index_on_stack = true;
         }
       } else {
@@ -1110,7 +1018,7 @@
         length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
         OpRegReg(kOpSub, rs_rCX, rs_rBX);
         // Put the start index to stack.
-        NewLIR1(kX86Push32R, rBX);
+        NewLIR1(kX86Push32R, rs_rBX.GetReg());
         is_index_on_stack = true;
       }
     }
@@ -1130,12 +1038,12 @@
     if (start_value == 0) {
       OpRegCopy(rs_rDI, rs_rBX);
     } else {
-      NewLIR3(kX86Lea32RM, rDI, rBX, 2 * start_value);
+      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
     }
   } else {
     if (is_index_on_stack == true) {
       // Load the start index from stack.
-      NewLIR1(kX86Pop32R, rDX);
+      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
       OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
     } else {
       OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
@@ -1153,7 +1061,7 @@
   // index = ((curr_ptr - orig_ptr) / 2) - 1.
   OpRegReg(kOpSub, rs_rDI, rs_rBX);
   OpRegImm(kOpAsr, rs_rDI, 1);
-  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rDI, -1);
+  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
   LIR *all_done = NewLIR1(kX86Jmp8, 0);
 
   // Failed to match; return -1.
@@ -1165,7 +1073,7 @@
   // And join up at the end.
   all_done->target = NewLIR0(kPseudoTargetLabel);
   // Restore EDI from the stack.
-  NewLIR1(kX86Pop32R, rDI);
+  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
 
   // Out of line code returns here.
   if (slowpath_branch != nullptr) {
@@ -1178,18 +1086,6 @@
 }
 
 /*
- * @brief Enter a 32 bit quantity into the FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- */
-static void PushWord(std::vector<uint8_t>&buf, int data) {
-  buf.push_back(data & 0xff);
-  buf.push_back((data >> 8) & 0xff);
-  buf.push_back((data >> 16) & 0xff);
-  buf.push_back((data >> 24) & 0xff);
-}
-
-/*
  * @brief Enter an 'advance LOC' into the FDE buffer
  * @param buf FDE buffer.
  * @param increment Amount by which to increase the current location.
@@ -1339,4 +1235,73 @@
   return cfi_info;
 }
 
+void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+    case kMirOpConstVector:
+      GenConst128(bb, mir);
+      break;
+    default:
+      break;
+  }
+}
+
+void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
+  int type_size = mir->dalvikInsn.vA;
+  // We support 128 bit vectors.
+  DCHECK_EQ(type_size & 0xFFFF, 128);
+  int reg = mir->dalvikInsn.vB;
+  DCHECK_LT(reg, 8);
+  uint32_t *args = mir->dalvikInsn.arg;
+  // Check for all 0 case.
+  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
+    NewLIR2(kX86XorpsRR, reg, reg);
+    return;
+  }
+  // Okay, load it from the constant vector area.
+  LIR *data_target = ScanVectorLiteral(mir);
+  if (data_target == nullptr) {
+    data_target = AddVectorLiteral(mir);
+  }
+
+  // Address the start of the method.
+  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+  rl_method = LoadValue(rl_method, kCoreReg);
+
+  // Load the proper value from the literal area.
+  // We don't know the proper offset for the value, so pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler later to have the right
+  // value.
+  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
+  load->flags.fixup = kFixupLoad;
+  load->target = data_target;
+  SetMemRefType(load, true, kLiteral);
+}
+
+LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
+  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
+  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
+        args[2] == p->operands[2] && args[3] == p->operands[3]) {
+      return p;
+    }
+  }
+  return nullptr;
+}
+
+LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
+  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
+  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
+  new_value->operands[0] = args[0];
+  new_value->operands[1] = args[1];
+  new_value->operands[2] = args[2];
+  new_value->operands[3] = args[3];
+  new_value->next = const_vectors_;
+  if (const_vectors_ == nullptr) {
+    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
+  }
+  estimated_native_code_size_ += 16;  // Space for one vector.
+  const_vectors_ = new_value;
+  return new_value;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 4d45055..fed31c1 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -26,18 +26,19 @@
 LIR* X86Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   int opcode;
   /* must be both DOUBLE or both not DOUBLE */
-  DCHECK_EQ(X86_DOUBLEREG(r_dest.GetReg()), X86_DOUBLEREG(r_src.GetReg()));
-  if (X86_DOUBLEREG(r_dest.GetReg())) {
+  DCHECK(r_dest.IsFloat() || r_src.IsFloat());
+  DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
+  if (r_dest.IsDouble()) {
     opcode = kX86MovsdRR;
   } else {
-    if (X86_SINGLEREG(r_dest.GetReg())) {
-      if (X86_SINGLEREG(r_src.GetReg())) {
+    if (r_dest.IsSingle()) {
+      if (r_src.IsSingle()) {
         opcode = kX86MovssRR;
       } else {  // Fpr <- Gpr
         opcode = kX86MovdxrRR;
       }
     } else {  // Gpr <- Fpr
-      DCHECK(X86_SINGLEREG(r_src.GetReg()));
+      DCHECK(r_src.IsSingle()) << "Raw: 0x" << std::hex << r_src.GetRawBits();
       opcode = kX86MovdrxRR;
     }
   }
@@ -76,11 +77,10 @@
  */
 LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
   RegStorage r_dest_save = r_dest;
-  if (X86_FPREG(r_dest.GetReg())) {
+  if (r_dest.IsFloat()) {
     if (value == 0) {
       return NewLIR2(kX86XorpsRR, r_dest.GetReg(), r_dest.GetReg());
     }
-    DCHECK(X86_SINGLEREG(r_dest.GetReg()));
     r_dest = AllocTemp();
   }
 
@@ -92,7 +92,7 @@
     res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
   }
 
-  if (X86_FPREG(r_dest_save.GetReg())) {
+  if (r_dest_save.IsFloat()) {
     NewLIR2(kX86MovdxrRR, r_dest_save.GetReg(), r_dest.GetReg());
     FreeTemp(r_dest);
   }
@@ -129,33 +129,59 @@
 LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
   X86OpCode opcode = kX86Bkpt;
   bool byte_imm = IS_SIMM8(value);
-  DCHECK(!X86_FPREG(r_dest_src1.GetReg()));
-  switch (op) {
-    case kOpLsl: opcode = kX86Sal32RI; break;
-    case kOpLsr: opcode = kX86Shr32RI; break;
-    case kOpAsr: opcode = kX86Sar32RI; break;
-    case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
-    case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
-    case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
-    // case kOpSbb: opcode = kX86Sbb32RI; break;
-    case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
-    case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
-    case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
-    case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov:
-      /*
-       * Moving the constant zero into register can be specialized as an xor of the register.
-       * However, that sets eflags while the move does not. For that reason here, always do
-       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
-       */
-      opcode = kX86Mov32RI;
-      break;
-    case kOpMul:
-      opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
-    default:
-      LOG(FATAL) << "Bad case in OpRegImm " << op;
+  DCHECK(!r_dest_src1.IsFloat());
+  if (r_dest_src1.Is64Bit()) {
+    switch (op) {
+      case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
+    }
+  } else {
+    switch (op) {
+      case kOpLsl: opcode = kX86Sal32RI; break;
+      case kOpLsr: opcode = kX86Shr32RI; break;
+      case kOpAsr: opcode = kX86Sar32RI; break;
+      case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
+      case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
+      case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
+      // case kOpSbb: opcode = kX86Sbb32RI; break;
+      case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
+      case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
+      case kOpMov:
+        /*
+         * Moving the constant zero into register can be specialized as an xor of the register.
+         * However, that sets eflags while the move does not. For that reason here, always do
+         * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+         */
+        opcode = kX86Mov32RI;
+        break;
+      case kOpMul:
+        opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
+        return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
+      case kOp2Byte:
+        opcode = kX86Mov32RI;
+        value = static_cast<int8_t>(value);
+        break;
+      case kOp2Short:
+        opcode = kX86Mov32RI;
+        value = static_cast<int16_t>(value);
+        break;
+      case kOp2Char:
+        opcode = kX86Mov32RI;
+        value = static_cast<uint16_t>(value);
+        break;
+      case kOpNeg:
+        opcode = kX86Mov32RI;
+        value = -value;
+        break;
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm " << op;
+    }
   }
+  CHECK(!r_dest_src1.Is64Bit() || X86Mir2Lir::EncodingMap[opcode].kind == kReg64Imm) << "OpRegImm(" << op << ")";
   return NewLIR2(opcode, r_dest_src1.GetReg(), value);
 }
 
@@ -191,8 +217,10 @@
       case kOpOr:  opcode = kX86Or32RR; break;
       case kOpXor: opcode = kX86Xor32RR; break;
       case kOp2Byte:
+        // TODO: there are several instances of this check.  A utility function perhaps?
+        // TODO: Similar to Arm's reg < 8 check.  Perhaps add attribute checks to RegStorage?
         // Use shifts instead of a byte operand if the source can't be byte accessed.
-        if (r_src2.GetReg() >= 4) {
+        if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
           NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
           NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24);
           return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24);
@@ -207,49 +235,49 @@
         LOG(FATAL) << "Bad case in OpRegReg " << op;
         break;
     }
-    CHECK(!src2_must_be_cx || r_src2.GetReg() == rCX);
+    CHECK(!src2_must_be_cx || r_src2.GetReg() == rs_rCX.GetReg());
     return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
 }
 
 LIR* X86Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
-  DCHECK(!(X86_FPREG(r_base.GetReg())));
+  DCHECK(!r_base.IsFloat());
   X86OpCode opcode = kX86Nop;
   int dest = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
   switch (move_type) {
     case kMov8GP:
-      CHECK(!X86_FPREG(dest));
+      CHECK(!r_dest.IsFloat());
       opcode = kX86Mov8RM;
       break;
     case kMov16GP:
-      CHECK(!X86_FPREG(dest));
+      CHECK(!r_dest.IsFloat());
       opcode = kX86Mov16RM;
       break;
     case kMov32GP:
-      CHECK(!X86_FPREG(dest));
+      CHECK(!r_dest.IsFloat());
       opcode = kX86Mov32RM;
       break;
     case kMov32FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovssRM;
       break;
     case kMov64FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovsdRM;
       break;
     case kMovU128FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovupsRM;
       break;
     case kMovA128FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovapsRM;
       break;
     case kMovLo128FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovlpsRM;
       break;
     case kMovHi128FP:
-      CHECK(X86_FPREG(dest));
+      CHECK(r_dest.IsFloat());
       opcode = kX86MovhpsRM;
       break;
     case kMov64GP:
@@ -264,45 +292,45 @@
 }
 
 LIR* X86Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
-  DCHECK(!(X86_FPREG(r_base.GetReg())));
+  DCHECK(!r_base.IsFloat());
   int src = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
 
   X86OpCode opcode = kX86Nop;
   switch (move_type) {
     case kMov8GP:
-      CHECK(!X86_FPREG(src));
+      CHECK(!r_src.IsFloat());
       opcode = kX86Mov8MR;
       break;
     case kMov16GP:
-      CHECK(!X86_FPREG(src));
+      CHECK(!r_src.IsFloat());
       opcode = kX86Mov16MR;
       break;
     case kMov32GP:
-      CHECK(!X86_FPREG(src));
+      CHECK(!r_src.IsFloat());
       opcode = kX86Mov32MR;
       break;
     case kMov32FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovssMR;
       break;
     case kMov64FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovsdMR;
       break;
     case kMovU128FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovupsMR;
       break;
     case kMovA128FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovapsMR;
       break;
     case kMovLo128FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovlpsMR;
       break;
     case kMovHi128FP:
-      CHECK(X86_FPREG(src));
+      CHECK(r_src.IsFloat());
       opcode = kX86MovhpsMR;
       break;
     case kMov64GP:
@@ -367,7 +395,7 @@
       LOG(FATAL) << "Bad case in OpMemReg " << op;
       break;
   }
-  LIR *l = NewLIR3(opcode, rX86_SP, displacement, r_value);
+  LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value);
   AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
   AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
   return l;
@@ -390,7 +418,7 @@
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
-  LIR *l = NewLIR3(opcode, r_dest.GetReg(), rX86_SP, displacement);
+  LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement);
   AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
   return l;
 }
@@ -449,7 +477,7 @@
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
   } else if (op == kOpAnd) {
-    if (value == 0xFF && r_src.GetReg() < 4) {
+    if (value == 0xFF && r_src.Low4()) {
       return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
     } else if (value == 0xFFFF) {
       return NewLIR2(kX86Movzx16RR, r_dest.GetReg(), r_src.GetReg());
@@ -462,7 +490,7 @@
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     r4sib_no_index /* index */, 0 /* scale */, value /* disp */);
+                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -470,6 +498,20 @@
 }
 
 LIR* X86Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
+  DCHECK_EQ(kX86, cu_->instruction_set);
+  X86OpCode opcode = kX86Bkpt;
+  switch (op) {
+    case kOpBlx: opcode = kX86CallT;  break;
+    case kOpBx: opcode = kX86JmpT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  return NewLIR1(opcode, thread_offset.Int32Value());
+}
+
+LIR* X86Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) {
+  DCHECK_EQ(kX86_64, cu_->instruction_set);
   X86OpCode opcode = kX86Bkpt;
   switch (op) {
     case kOpBlx: opcode = kX86CallT;  break;
@@ -497,7 +539,7 @@
     int32_t val_hi = High32Bits(value);
     int32_t low_reg_val = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
     LIR *res;
-    bool is_fp = X86_FPREG(low_reg_val);
+    bool is_fp = r_dest.IsFloat();
     // TODO: clean this up once we fully recognize 64-bit storage containers.
     if (is_fp) {
       if (value == 0) {
@@ -518,7 +560,7 @@
         // 4 byte offset.  We will fix this up in the assembler later to have the right
         // value.
         res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::Solo64(low_reg_val),
-                           kDouble, INVALID_SREG);
+                           kDouble);
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
         SetMemRefType(res, true, kLiteral);
@@ -530,10 +572,9 @@
           res = LoadConstantNoClobber(RegStorage::Solo32(low_reg_val), val_lo);
         }
         if (val_hi != 0) {
-          // FIXME: clean up when AllocTempDouble no longer returns a pair.
           RegStorage r_dest_hi = AllocTempDouble();
-          LoadConstantNoClobber(RegStorage::Solo32(r_dest_hi.GetLowReg()), val_hi);
-          NewLIR2(kX86PunpckldqRR, low_reg_val, r_dest_hi.GetLowReg());
+          LoadConstantNoClobber(r_dest_hi, val_hi);
+          NewLIR2(kX86PunpckldqRR, low_reg_val, r_dest_hi.GetReg());
           FreeTemp(r_dest_hi);
         }
       }
@@ -544,37 +585,39 @@
     return res;
 }
 
-// FIXME: don't split r_dest into two storage units.
 LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                     int displacement, RegStorage r_dest, RegStorage r_dest_hi,
-                                     OpSize size, int s_reg) {
+                                     int displacement, RegStorage r_dest, OpSize size) {
   LIR *load = NULL;
   LIR *load2 = NULL;
   bool is_array = r_index.Valid();
-  bool pair = false;
-  bool is64bit = false;
+  bool pair = r_dest.IsPair();
+  bool is64bit = ((size == k64) || (size == kDouble));
   X86OpCode opcode = kX86Nop;
   switch (size) {
     case k64:
     case kDouble:
-      // TODO: use regstorage attributes here.
-      is64bit = true;
-      if (X86_FPREG(r_dest.GetReg())) {
+      if (r_dest.IsFloat()) {
         opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
       } else {
-        pair = true;
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_dest.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:  // TODO: update for reference decompression on 64-bit targets.
       opcode = is_array ? kX86Mov32RA : kX86Mov32RM;
-      if (X86_FPREG(r_dest.GetReg())) {
+      if (r_dest.IsFloat()) {
         opcode = is_array ? kX86MovssRA : kX86MovssRM;
-        DCHECK(X86_SINGLEREG(r_dest.GetReg()));
+        DCHECK(r_dest.IsFloat());
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -600,13 +643,14 @@
     if (!pair) {
       load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
     } else {
-      if (r_base == r_dest) {
-        load2 = NewLIR3(opcode, r_dest_hi.GetReg(), r_base.GetReg(),
+      DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
+      if (r_base == r_dest.GetLow()) {
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
-        load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
       } else {
-        load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
-        load2 = NewLIR3(opcode, r_dest_hi.GetReg(), r_base.GetReg(),
+        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
       }
     }
@@ -623,36 +667,37 @@
       load = NewLIR5(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                      displacement + LOWORD_OFFSET);
     } else {
-      if (r_base == r_dest) {
-        if (r_dest_hi == r_index) {
+      DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
+      if (r_base == r_dest.GetLow()) {
+        if (r_dest.GetHigh() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
           load2 = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
-          OpRegCopy(r_dest_hi, temp);
+          OpRegCopy(r_dest.GetHigh(), temp);
           FreeTemp(temp);
         } else {
-          load2 = NewLIR5(opcode, r_dest_hi.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
         }
       } else {
-        if (r_dest == r_index) {
+        if (r_dest.GetLow() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
           load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest_hi.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          OpRegCopy(r_dest, temp);
+          OpRegCopy(r_dest.GetLow(), temp);
           FreeTemp(temp);
         } else {
-          load = NewLIR5(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
-          load2 = NewLIR5(opcode, r_dest_hi.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
         }
       }
@@ -665,56 +710,60 @@
 /* Load value from base + scaled index. */
 LIR* X86Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                  int scale, OpSize size) {
-  return LoadBaseIndexedDisp(r_base, r_index, scale, 0,
-                             r_dest, RegStorage::InvalidReg(), size, INVALID_SREG);
+  return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size);
 }
 
-LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement,
-                  RegStorage r_dest, OpSize size, int s_reg) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
-  return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                             r_dest, RegStorage::InvalidReg(), size, s_reg);
+LIR* X86Mir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
+                                      OpSize size) {
+  // LoadBaseDisp() will emit correct insn for atomic load on x86
+  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
+  return LoadBaseDisp(r_base, displacement, r_dest, size);
 }
 
-LIR* X86Mir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
-                                  int s_reg) {
-  return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                             r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
+LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+                              OpSize size) {
+  return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
+                             size);
 }
 
 LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
-                                      int displacement, RegStorage r_src, RegStorage r_src_hi,
-                                      OpSize size, int s_reg) {
+                                      int displacement, RegStorage r_src, OpSize size) {
   LIR *store = NULL;
   LIR *store2 = NULL;
   bool is_array = r_index.Valid();
-  // FIXME: use regstorage attributes in place of these.
-  bool pair = false;
-  bool is64bit = false;
+  bool pair = r_src.IsPair();
+  bool is64bit = (size == k64) || (size == kDouble);
   X86OpCode opcode = kX86Nop;
   switch (size) {
     case k64:
     case kDouble:
-      is64bit = true;
-      if (X86_FPREG(r_src.GetReg())) {
+      if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
       } else {
-        pair = true;
-        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        if (Gen64Bit()) {
+          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        } else {
+          // TODO(64): pair = true;
+          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        }
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_src.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:
       opcode = is_array ? kX86Mov32AR : kX86Mov32MR;
-      if (X86_FPREG(r_src.GetReg())) {
+      if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovssAR : kX86MovssMR;
-        DCHECK(X86_SINGLEREG(r_src.GetReg()));
+        DCHECK(r_src.IsSingle());
       }
       DCHECK_EQ((displacement & 0x3), 0);
       break;
@@ -735,8 +784,9 @@
     if (!pair) {
       store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetReg());
     } else {
-      store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetReg());
-      store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src_hi.GetReg());
+      DCHECK(!r_src.IsFloat());  // Make sure we're not still using a pair here.
+      store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetLowReg());
+      store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src.GetHighReg());
     }
     if (r_base == rs_rX86_SP) {
       AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2,
@@ -751,48 +801,32 @@
       store = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
                       displacement + LOWORD_OFFSET, r_src.GetReg());
     } else {
+      DCHECK(!r_src.IsFloat());  // Make sure we're not still using a pair here.
       store = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
-                      displacement + LOWORD_OFFSET, r_src.GetReg());
+                      displacement + LOWORD_OFFSET, r_src.GetLowReg());
       store2 = NewLIR5(opcode, r_base.GetReg(), r_index.GetReg(), scale,
-                       displacement + HIWORD_OFFSET, r_src_hi.GetReg());
+                       displacement + HIWORD_OFFSET, r_src.GetHighReg());
     }
   }
-
   return store;
 }
 
 /* store value base base + scaled index. */
 LIR* X86Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                       int scale, OpSize size) {
-  return StoreBaseIndexedDisp(r_base, r_index, scale, 0,
-                              r_src, RegStorage::InvalidReg(), size, INVALID_SREG);
+  return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size);
+}
+
+LIR* X86Mir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement,
+                                       RegStorage r_src, OpSize size) {
+  // StoreBaseDisp() will emit correct insn for atomic store on x86
+  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
+  return StoreBaseDisp(r_base, displacement, r_src, size);
 }
 
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
                                RegStorage r_src, OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
-  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src,
-                              RegStorage::InvalidReg(), size, INVALID_SREG);
-}
-
-LIR* X86Mir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement,
-                              r_src.GetLow(), r_src.GetHigh(), k64, INVALID_SREG);
-}
-
-/*
- * Copy a long value in Core registers to an XMM register
- *
- */
-void X86Mir2Lir::OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg) {
-  NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
-  int tmp_reg = AllocTempDouble().GetLowReg();
-  NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
-  NewLIR2(kX86PunpckldqRR, fp_reg, tmp_reg);
-  FreeTemp(tmp_reg);
+  return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
@@ -848,6 +882,9 @@
     case kMirOpFusedCmpgDouble:
       AnalyzeFPInstruction(opcode, bb, mir);
       break;
+    case kMirOpConstVector:
+      store_method_addr_ = true;
+      break;
     default:
       // Ignore the rest.
       break;
@@ -889,7 +926,7 @@
 
 void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) {
   // Look at all the uses, and see if they are double constants.
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
   int next_sreg = 0;
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
@@ -921,4 +958,30 @@
   }
 }
 
+RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLoc(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
+RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLocWide(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 1759cbe..adfed0c 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -102,27 +102,6 @@
  * +========================+
  */
 
-// Offset to distingish FP regs.
-#define X86_FP_REG_OFFSET 32
-// Offset to distinguish DP FP regs.
-#define X86_FP_DOUBLE (X86_FP_REG_OFFSET + 16)
-// Reg types.
-#define X86_REGTYPE(x) (x & (X86_FP_REG_OFFSET | X86_FP_DOUBLE))
-#define X86_FPREG(x) ((x & X86_FP_REG_OFFSET) == X86_FP_REG_OFFSET)
-#define X86_DOUBLEREG(x) ((x & X86_FP_DOUBLE) == X86_FP_DOUBLE)
-#define X86_SINGLEREG(x) (X86_FPREG(x) && !X86_DOUBLEREG(x))
-
-/*
- * Note: the low register of a floating point pair is sufficient to
- * create the name of a double, but require both names to be passed to
- * allow for asserts to verify that the pair is consecutive if significant
- * rework is done in this area.  Also, it is a good reminder in the calling
- * code that reg locations always describe doubles as a pair of singles.
- */
-#define X86_S2D(x, y) ((x) | X86_FP_DOUBLE)
-/* Mask to strip off fp flags */
-#define X86_FP_REG_MASK 0xF
-
 enum X86ResourceEncodingPos {
   kX86GPReg0   = 0,
   kX86RegSP    = 4,
@@ -135,118 +114,161 @@
 #define ENCODE_X86_REG_SP           (1ULL << kX86RegSP)
 #define ENCODE_X86_FP_STACK         (1ULL << kX86FPStack)
 
+// FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
 enum X86NativeRegisterPool {
-  r0     = 0,
-  rAX    = r0,
-  r1     = 1,
-  rCX    = r1,
-  r2     = 2,
-  rDX    = r2,
-  r3     = 3,
-  rBX    = r3,
-  r4sp   = 4,
-  rX86_SP    = r4sp,
-  r4sib_no_index = r4sp,
-  r5     = 5,
-  rBP    = r5,
-  r5sib_no_base = r5,
-  r6     = 6,
-  rSI    = r6,
-  r7     = 7,
-  rDI    = r7,
+  r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
+  rAX            = r0,
+  r1             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
+  rCX            = r1,
+  r2             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
+  rDX            = r2,
+  r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
+  rBX            = r3,
+  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_32     = r4sp_32,
+  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_64     = r4sp_64,
+  r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
+  rBP            = r5,
+  r5sib_no_base  = r5,
+  r6             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
+  rSI            = r6,
+  r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
+  rDI            = r7,
 #ifndef TARGET_REX_SUPPORT
-  rRET   = 8,  // fake return address register for core spill mask.
+  // fake return address register for core spill mask.
+  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
 #else
-  r8     = 8,
-  r9     = 9,
-  r10    = 10,
-  r11    = 11,
-  r12    = 12,
-  r13    = 13,
-  r14    = 14,
-  r15    = 15,
-  rRET   = 16,  // fake return address register for core spill mask.
+  r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
+  r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
+  r10            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
+  r11            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
+  r12            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
+  r13            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
+  r14            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
+  r15            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
+  // fake return address register for core spill mask.
+  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
 #endif
-  fr0  =  0 + X86_FP_REG_OFFSET,
-  fr1  =  1 + X86_FP_REG_OFFSET,
-  fr2  =  2 + X86_FP_REG_OFFSET,
-  fr3  =  3 + X86_FP_REG_OFFSET,
-  fr4  =  4 + X86_FP_REG_OFFSET,
-  fr5  =  5 + X86_FP_REG_OFFSET,
-  fr6  =  6 + X86_FP_REG_OFFSET,
-  fr7  =  7 + X86_FP_REG_OFFSET,
-  fr8  =  8 + X86_FP_REG_OFFSET,
-  fr9  =  9 + X86_FP_REG_OFFSET,
-  fr10 = 10 + X86_FP_REG_OFFSET,
-  fr11 = 11 + X86_FP_REG_OFFSET,
-  fr12 = 12 + X86_FP_REG_OFFSET,
-  fr13 = 13 + X86_FP_REG_OFFSET,
-  fr14 = 14 + X86_FP_REG_OFFSET,
-  fr15 = 15 + X86_FP_REG_OFFSET,
+
+  // xmm registers, single precision view
+  fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
+  fr1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
+  fr2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
+  fr3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3,
+  fr4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4,
+  fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
+  fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
+  fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
+
+  // xmm registers, double precision alises
+  dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
+  dr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
+  dr2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
+  dr3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3,
+  dr4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4,
+  dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
+  dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
+  dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
+
+  // xmm registers, quad precision alises
+  qr0  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 0,
+  qr1  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 1,
+  qr2  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 2,
+  qr3  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 3,
+  qr4  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 4,
+  qr5  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 5,
+  qr6  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 6,
+  qr7  = RegStorage::k128BitSolo | RegStorage::kFloatingPoint | 7,
+
+  // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };
 
-const RegStorage rs_r0(RegStorage::k32BitSolo, r0);
-const RegStorage rs_rAX = rs_r0;
-const RegStorage rs_r1(RegStorage::k32BitSolo, r1);
-const RegStorage rs_rCX = rs_r1;
-const RegStorage rs_r2(RegStorage::k32BitSolo, r2);
-const RegStorage rs_rDX = rs_r2;
-const RegStorage rs_r3(RegStorage::k32BitSolo, r3);
-const RegStorage rs_rBX = rs_r3;
-const RegStorage rs_r4sp(RegStorage::k32BitSolo, r4sp);
-const RegStorage rs_rX86_SP = rs_r4sp;
-const RegStorage rs_r5(RegStorage::k32BitSolo, r5);
-const RegStorage rs_rBP = rs_r5;
-const RegStorage rs_r6(RegStorage::k32BitSolo, r6);
-const RegStorage rs_rSI = rs_r6;
-const RegStorage rs_r7(RegStorage::k32BitSolo, r7);
-const RegStorage rs_rDI = rs_r7;
+constexpr RegStorage rs_r0(RegStorage::kValid | r0);
+constexpr RegStorage rs_rAX = rs_r0;
+constexpr RegStorage rs_r1(RegStorage::kValid | r1);
+constexpr RegStorage rs_rCX = rs_r1;
+constexpr RegStorage rs_r2(RegStorage::kValid | r2);
+constexpr RegStorage rs_rDX = rs_r2;
+constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+constexpr RegStorage rs_rBX = rs_r3;
+constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
+constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
+extern RegStorage rs_rX86_SP;
+constexpr RegStorage rs_r5(RegStorage::kValid | r5);
+constexpr RegStorage rs_rBP = rs_r5;
+constexpr RegStorage rs_r6(RegStorage::kValid | r6);
+constexpr RegStorage rs_rSI = rs_r6;
+constexpr RegStorage rs_r7(RegStorage::kValid | r7);
+constexpr RegStorage rs_rDI = rs_r7;
+constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
 
-// TODO: elminate these #defines?
-#define rX86_ARG0 rAX
-#define rs_rX86_ARG0 rs_rAX
-#define rX86_ARG1 rCX
-#define rs_rX86_ARG1 rs_rCX
-#define rX86_ARG2 rDX
-#define rs_rX86_ARG2 rs_rDX
-#define rX86_ARG3 rBX
-#define rs_rX86_ARG3 rs_rBX
-#define rX86_FARG0 rAX
-#define rs_rX86_FARG0 rs_rAX
-#define rX86_FARG1 rCX
-#define rs_rX86_FARG1 rs_rCX
-#define rX86_FARG2 rDX
-#define rs_rX86_FARG2 rs_rDX
-#define rX86_FARG3 rBX
-#define rs_rX86_FARG3 rs_rBX
-#define rX86_RET0 rAX
-#define rs_rX86_RET0 rs_rAX
-#define rX86_RET1 rDX
-#define rs_rX86_RET1 rs_rDX
-#define rX86_INVOKE_TGT rAX
-#define rs_rX86_INVOKE_TGT rs_rAX
-#define rX86_LR RegStorage::kInvalidRegVal
-#define rX86_SUSPEND RegStorage::kInvalidRegVal
-#define rX86_SELF RegStorage::kInvalidRegVal
-#define rX86_COUNT rCX
-#define rs_rX86_COUNT rs_rCX
-#define rX86_PC RegStorage::kInvalidRegVal
+constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
+constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
+constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
+constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
+constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
+constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
+constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
+constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
+
+constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
+constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
+constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
+constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
+constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
+constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
+constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
+constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
+
+constexpr RegStorage rs_qr0(RegStorage::kValid | qr0);
+constexpr RegStorage rs_qr1(RegStorage::kValid | qr1);
+constexpr RegStorage rs_qr2(RegStorage::kValid | qr2);
+constexpr RegStorage rs_qr3(RegStorage::kValid | qr3);
+constexpr RegStorage rs_qr4(RegStorage::kValid | qr4);
+constexpr RegStorage rs_qr5(RegStorage::kValid | qr5);
+constexpr RegStorage rs_qr6(RegStorage::kValid | qr6);
+constexpr RegStorage rs_qr7(RegStorage::kValid | qr7);
+
+extern X86NativeRegisterPool rX86_ARG0;
+extern X86NativeRegisterPool rX86_ARG1;
+extern X86NativeRegisterPool rX86_ARG2;
+extern X86NativeRegisterPool rX86_ARG3;
+extern X86NativeRegisterPool rX86_FARG0;
+extern X86NativeRegisterPool rX86_FARG1;
+extern X86NativeRegisterPool rX86_FARG2;
+extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_RET0;
+extern X86NativeRegisterPool rX86_RET1;
+extern X86NativeRegisterPool rX86_INVOKE_TGT;
+extern X86NativeRegisterPool rX86_COUNT;
+
+extern RegStorage rs_rX86_ARG0;
+extern RegStorage rs_rX86_ARG1;
+extern RegStorage rs_rX86_ARG2;
+extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_FARG0;
+extern RegStorage rs_rX86_FARG1;
+extern RegStorage rs_rX86_FARG2;
+extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_RET0;
+extern RegStorage rs_rX86_RET1;
+extern RegStorage rs_rX86_INVOKE_TGT;
+extern RegStorage rs_rX86_COUNT;
 
 // RegisterLocation templates return values (r_V0, or r_V0/r_V1).
 const RegLocation x86_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
 const RegLocation x86_loc_c_return_wide
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
-// TODO: update to use k32BitVector (must encode in 7 bits, including fp flag).
 const RegLocation x86_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, kVectorLength4,
+    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
-// TODO: update to use k64BitVector (must encode in 7 bits, including fp flag).
 const RegLocation x86_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, kVectorLength8,
-     RegStorage(RegStorage::k64BitPair, fr0, fr0), INVALID_SREG, INVALID_SREG};
+    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, dr0), INVALID_SREG, INVALID_SREG};
 
 /*
  * The following enum defines the list of supported X86 instructions by the
@@ -289,10 +311,10 @@
   opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
   opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
   opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
-  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
-  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
-  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
-  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
+  opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR,  \
+  opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \
+  opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
+  opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
   BinaryOpCode(kX86Add),
   BinaryOpCode(kX86Or),
   BinaryOpCode(kX86Adc),
@@ -311,14 +333,18 @@
   kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
   kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
   kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
-  kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
-  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
-  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
+  kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR,
+  kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT,
+  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI,
   kX86Lea32RM,
   kX86Lea32RA,
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
   //             - lir operands - 0: reg1, 1: reg2, 2: CC
   kX86Cmov32RRC,
+  // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
+  //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
+  kX86Cmov32RMC,
+
   // RC - Register CL - opcode reg, CL
   //          - lir operands - 0: reg, 1: CL
   // MC - Memory CL   - opcode [base + disp], CL
@@ -342,7 +368,9 @@
 #undef BinaryShiftOpcode
   kX86Cmc,
   kX86Shld32RRI,
+  kX86Shld32MRI,
   kX86Shrd32RRI,
+  kX86Shrd32MRI,
 #define UnaryOpcode(opcode, reg, mem, array) \
   opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
   opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -397,6 +425,8 @@
   kX86Fild64M,                  // push 64-bit integer on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
+  kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
   kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
   Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
@@ -449,19 +479,21 @@
   kNullary,                                // Opcode that takes no arguments.
   kPrefix2Nullary,                         // Opcode that takes no arguments, but 2 prefixes.
   kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
-  kReg, kMem, kArray,                      // R, M and A instruction kinds.
-  kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
-  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
+  kReg, kReg64, kMem, kArray,              // R, M and A instruction kinds.
+  kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg,          // MR, AR and TR instruction kinds.
+  kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread,  // RR, RM, RA and RT instruction kinds.
   kRegRegStore,                            // RR following the store modrm reg-reg encoding rather than the load.
-  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
+  kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
   kRegRegImmRev,                           // RRI with first reg in r/m
+  kMemRegImm,                              // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
   kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
   kRegRegCond,                             // RR instruction kind followed by a condition.
+  kRegMemCond,                             // RM instruction kind followed by a condition.
   kJmp, kJcc, kCall,                       // Branch instruction kinds.
   kPcRel,                                  // Operation with displacement that is PC relative
   kMacro,                                  // An instruction composing multiple others
@@ -500,6 +532,11 @@
 
 // Segment override instruction prefix used for quick TLS access to Thread::Current().
 #define THREAD_PREFIX 0x64
+#define THREAD_PREFIX_GS 0x65
+
+// 64 Bit Operand Size
+#define REX_W 0x48
+// Extension of the ModR/M reg field
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 11bec99..df21343 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -21,77 +21,101 @@
 namespace art {
 
 /*
- * Representation of the physical register, register pair or vector holding a Dalvik value.
- * The basic configuration of the storage (i.e. solo reg, pair, vector) is common across all
- * targets, but the encoding of the actual storage element is target independent.
+ * 16-bit representation of the physical register container holding a Dalvik value.
+ * The encoding allows up to 64 physical elements per storage class, and supports eight
+ * register container shapes.
  *
- * The two most-significant bits describe the basic shape of the storage, while meaning of the
- * lower 14 bits depends on the shape:
+ * [V] [HHHHH] [SSS] [F] [LLLLLL]
  *
- *  [PW]
- *       P: 0 -> pair, 1 -> solo (or vector)
- *       W: 1 -> 64 bits, 0 -> 32 bits
+ * [LLLLLL]
+ *  Physical register number for the low or solo register.
+ *    0..63
  *
- *  [00] [xxxxxxxxxxxxxx]     Invalid (typically all zeros)
- *  [01] [HHHHHHH] [LLLLLLL]  64-bit storage, composed of 2 32-bit registers
- *  [10] [0] [xxxxxx] [RRRRRRR]  32-bit solo register
- *  [11] [0] [xxxxxx] [RRRRRRR]  64-bit solo register
- *  [10] [1] [xxxxxx] [VVVVVVV]  32-bit vector storage
- *  [11] [1] [xxxxxx] [VVVVVVV]  64-bit vector storage
+ * [F]
+ *  Describes type of the [LLLLL] register.
+ *    0: Core
+ *    1: Floating point
  *
- * x - don't care
- * L - low register number of a pair
- * H - high register number of a pair
- * R - register number of a solo reg
- * V - vector description
+ * [SSS]
+ *  Shape of the register container.
+ *    000: Invalid
+ *    001: 32-bit solo register
+ *    010: 64-bit solo register
+ *    011: 64-bit pair consisting of two 32-bit solo registers
+ *    100: 128-bit solo register
+ *    101: 256-bit solo register
+ *    110: 512-bit solo register
+ *    111: 1024-bit solo register
  *
- * Note that in all non-invalid cases, the low 7 bits must be sufficient to describe
- * whether the storage element is floating point (see IsFloatReg()).
+ * [HHHHH]
+ *  Physical register number of the high register (valid only for register pair).
+ *    0..31
  *
+ * [V]
+ *    0 -> Invalid
+ *    1 -> Valid
+ *
+ * Note that in all non-invalid cases, we can determine if the storage is floating point
+ * by testing bit 7.  Note also that a register pair is effectively limited to a pair of
+ * physical register numbers in the 0..31 range.
+ *
+ * On some target architectures, the same underlying physical register container can be given
+ * different views.  For example, Arm's 32-bit single-precision floating point registers
+ * s2 and s3 map to the low and high halves of double-precision d1.  Similarly, X86's xmm3
+ * vector register can be viewed as 32-bit, 64-bit, 128-bit, etc.  In these cases the use of
+ * one view will affect the other views.  The RegStorage class does not concern itself
+ * with potential aliasing.  That will be done using the associated RegisterInfo struct.
+ * Distinct RegStorage elements should be created for each view of a physical register
+ * container.  The management of the aliased physical elements will be handled via RegisterInfo
+ * records.
  */
 
 class RegStorage {
  public:
   enum RegStorageKind {
-    kInvalid     = 0x0000,
-    k64BitPair   = 0x4000,
-    k32BitSolo   = 0x8000,
-    k64BitSolo   = 0xc000,
-    k32BitVector = 0xa000,
-    k64BitVector = 0xe000,
-    kPairMask    = 0x8000,
-    kPair        = 0x0000,
-    kSizeMask    = 0x4000,
-    k64Bit       = 0x4000,
-    k32Bit       = 0x0000,
-    kVectorMask  = 0xa000,
-    kVector      = 0xa000,
-    kSolo        = 0x8000,
-    kShapeMask   = 0xc000,
-    kKindMask    = 0xe000
+    kValidMask     = 0x8000,
+    kValid         = 0x8000,
+    kInvalid       = 0x0000,
+    kShapeMask     = 0x0380,
+    k32BitSolo     = 0x0080,
+    k64BitSolo     = 0x0100,
+    k64BitPair     = 0x0180,
+    k128BitSolo    = 0x0200,
+    k256BitSolo    = 0x0280,
+    k512BitSolo    = 0x0300,
+    k1024BitSolo   = 0x0380,
+    k64BitMask     = 0x0300,
+    k64Bits        = 0x0100,
+    kShapeTypeMask = 0x03c0,
+    kFloatingPoint = 0x0040,
+    kCoreRegister  = 0x0000,
   };
 
-  static const uint16_t kRegValMask = 0x007f;
-  static const uint16_t kInvalidRegVal = 0x007f;
-  static const uint16_t kHighRegShift = 7;
-  static const uint16_t kHighRegMask = kRegValMask << kHighRegShift;
+  static const uint16_t kRegValMask  = 0x03ff;     // Num, type and shape.
+  static const uint16_t kRegTypeMask = 0x007f;     // Num and type.
+  static const uint16_t kRegNumMask  = 0x003f;     // Num only.
+  static const uint16_t kHighRegNumMask = 0x001f;  // 0..31 for high reg
+  static const uint16_t kMaxRegs     = kRegValMask + 1;
+  // TODO: deprecate use of kInvalidRegVal and speed up GetReg().  Rely on valid bit instead.
+  static const uint16_t kInvalidRegVal = 0x03ff;
+  static const uint16_t kHighRegShift = 10;
+  static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift);
 
+  // Reg is [F][LLLLL], will override any existing shape and use rs_kind.
   RegStorage(RegStorageKind rs_kind, int reg) {
-    DCHECK_NE(rs_kind & kShapeMask, kInvalid);
-    DCHECK_NE(rs_kind & kShapeMask, k64BitPair);
-    DCHECK_EQ(rs_kind & ~kKindMask, 0);
-    DCHECK_EQ(reg & ~kRegValMask, 0);
-    reg_ = rs_kind | reg;
+    DCHECK_NE(rs_kind, k64BitPair);
+    DCHECK_EQ(rs_kind & ~kShapeMask, 0);
+    reg_ = kValid | rs_kind | (reg & kRegTypeMask);
   }
   RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) {
     DCHECK_EQ(rs_kind, k64BitPair);
-    DCHECK_EQ(low_reg & ~kRegValMask, 0);
-    DCHECK_EQ(high_reg & ~kRegValMask, 0);
-    reg_ = rs_kind | (high_reg << kHighRegShift) | low_reg;
+    DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint);
+    DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31";
+    reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
+        (low_reg & kRegTypeMask);
   }
-  explicit RegStorage(uint16_t val) : reg_(val) {}
+  constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
   RegStorage() : reg_(kInvalid) {}
-  ~RegStorage() {}
 
   bool operator==(const RegStorage rhs) const {
     return (reg_ == rhs.GetRawBits());
@@ -102,73 +126,131 @@
   }
 
   bool Valid() const {
-    return ((reg_ & kShapeMask) != kInvalid);
+    return ((reg_ & kValidMask) == kValid);
   }
 
   bool Is32Bit() const {
-    return ((reg_ & kSizeMask) == k32Bit);
+    return ((reg_ & kShapeMask) == k32BitSolo);
   }
 
   bool Is64Bit() const {
-    return ((reg_ & kSizeMask) == k64Bit);
+    return ((reg_ & k64BitMask) == k64Bits);
+  }
+
+  bool Is64BitSolo() const {
+    return ((reg_ & kShapeMask) == k64BitSolo);
   }
 
   bool IsPair() const {
-    return ((reg_ & kPairMask) == kPair);
+    return ((reg_ & kShapeMask) == k64BitPair);
   }
 
-  bool IsSolo() const {
-    return ((reg_ & kVectorMask) == kSolo);
+  bool IsFloat() const {
+    DCHECK(Valid());
+    return ((reg_ & kFloatingPoint) == kFloatingPoint);
   }
 
-  bool IsVector() const {
-    return ((reg_ & kVectorMask) == kVector);
+  bool IsDouble() const {
+    DCHECK(Valid());
+    return (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
+  }
+
+  bool IsSingle() const {
+    DCHECK(Valid());
+    return (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
+  }
+
+  static bool IsFloat(uint16_t reg) {
+    return ((reg & kFloatingPoint) == kFloatingPoint);
+  }
+
+  static bool IsDouble(uint16_t reg) {
+    return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits);
+  }
+
+  static bool IsSingle(uint16_t reg) {
+    return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint;
   }
 
   // Used to retrieve either the low register of a pair, or the only register.
   int GetReg() const {
-    DCHECK(!IsPair());
+    DCHECK(!IsPair()) << "reg_ = 0x" << std::hex << reg_;
     return Valid() ? (reg_ & kRegValMask) : kInvalidRegVal;
   }
 
+  // Sets shape, type and num of solo.
   void SetReg(int reg) {
     DCHECK(Valid());
+    DCHECK(!IsPair());
     reg_ = (reg_ & ~kRegValMask) | reg;
   }
 
+  // Set the reg number and type only, target remain 64-bit pair.
   void SetLowReg(int reg) {
     DCHECK(IsPair());
-    reg_ = (reg_ & ~kRegValMask) | reg;
+    reg_ = (reg_ & ~kRegTypeMask) | (reg & kRegTypeMask);
   }
 
-  // Retrieve the least significant register of a pair.
+  // Retrieve the least significant register of a pair and return as 32-bit solo.
   int GetLowReg() const {
     DCHECK(IsPair());
-    return (reg_ & kRegValMask);
+    return ((reg_ & kRegTypeMask) | k32BitSolo);
   }
 
   // Create a stand-alone RegStorage from the low reg of a pair.
   RegStorage GetLow() const {
     DCHECK(IsPair());
-    return RegStorage(k32BitSolo, reg_ & kRegValMask);
+    return RegStorage(k32BitSolo, reg_ & kRegTypeMask);
   }
 
   // Retrieve the most significant register of a pair.
   int GetHighReg() const {
     DCHECK(IsPair());
-    return (reg_ & kHighRegMask) >> kHighRegShift;
+    return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift) | (reg_ & kFloatingPoint);
   }
 
   // Create a stand-alone RegStorage from the high reg of a pair.
   RegStorage GetHigh() const {
     DCHECK(IsPair());
-    return RegStorage(k32BitSolo, (reg_ & kHighRegMask) >> kHighRegShift);
+    return RegStorage(kValid | GetHighReg());
   }
 
   void SetHighReg(int reg) {
     DCHECK(IsPair());
-    reg_ = (reg_ & ~kHighRegMask) | (reg << kHighRegShift);
-    DCHECK_EQ(GetHighReg(), reg);
+    reg_ = (reg_ & ~kHighRegMask) | ((reg & kHighRegNumMask) << kHighRegShift);
+  }
+
+  // Return the register number of low or solo.
+  int GetRegNum() const {
+    return reg_ & kRegNumMask;
+  }
+
+  // Aliased double to low single.
+  RegStorage DoubleToLowSingle() const {
+    DCHECK(IsDouble());
+    return FloatSolo32(GetRegNum() << 1);
+  }
+
+  // Aliased double to high single.
+  RegStorage DoubleToHighSingle() const {
+    DCHECK(IsDouble());
+    return FloatSolo32((GetRegNum() << 1) + 1);
+  }
+
+  // Single to aliased double.
+  RegStorage SingleToDouble() const {
+    DCHECK(IsSingle());
+    return FloatSolo64(GetRegNum() >> 1);
+  }
+
+  // Is register number in 0..7?
+  bool Low8() const {
+    return GetRegNum() < 8;
+  }
+
+  // Is register number in 0..3?
+  bool Low4() const {
+    return GetRegNum() < 4;
   }
 
   // Combine 2 32-bit solo regs into a pair.
@@ -180,24 +262,61 @@
     return RegStorage(k64BitPair, low.GetReg(), high.GetReg());
   }
 
+  static bool SameRegType(RegStorage reg1, RegStorage reg2) {
+    return (reg1.IsDouble() == reg2.IsDouble()) && (reg1.IsSingle() == reg2.IsSingle());
+  }
+
+  static bool SameRegType(int reg1, int reg2) {
+    return (IsDouble(reg1) == IsDouble(reg2)) && (IsSingle(reg1) == IsSingle(reg2));
+  }
+
   // Create a 32-bit solo.
   static RegStorage Solo32(int reg_num) {
-    return RegStorage(k32BitSolo, reg_num);
+    return RegStorage(k32BitSolo, reg_num & kRegTypeMask);
+  }
+
+  // Create a floating point 32-bit solo.
+  static RegStorage FloatSolo32(int reg_num) {
+    return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
   }
 
   // Create a 64-bit solo.
   static RegStorage Solo64(int reg_num) {
-    return RegStorage(k64BitSolo, reg_num);
+    return RegStorage(k64BitSolo, reg_num & kRegTypeMask);
+  }
+
+  // Create a floating point 64-bit solo.
+  static RegStorage FloatSolo64(int reg_num) {
+    return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint);
   }
 
   static RegStorage InvalidReg() {
     return RegStorage(kInvalid);
   }
 
+  static uint16_t RegNum(int raw_reg_bits) {
+    return raw_reg_bits & kRegNumMask;
+  }
+
   int GetRawBits() const {
     return reg_;
   }
 
+  size_t StorageSize() {
+    switch (reg_ & kShapeMask) {
+      case kInvalid: return 0;
+      case k32BitSolo: return 4;
+      case k64BitSolo: return 8;
+      case k64BitPair: return 8;  // Is this useful?  Might want to disallow taking size of pair.
+      case k128BitSolo: return 16;
+      case k256BitSolo: return 32;
+      case k512BitSolo: return 64;
+      case k1024BitSolo: return 128;
+      default: LOG(FATAL) << "Unexpected shape";
+    }
+    return 0;
+  }
+
  private:
   uint16_t reg_;
 };
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 5f89c21..6f47b8f 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "bit_vector_block_iterator.h"
 #include "compiler_internals.h"
 #include "dataflow_iterator-inl.h"
 
@@ -127,12 +126,7 @@
     return false;
   }
 
-  ArenaBitVector::Iterator iterator(bb->data_flow_info->def_v);
-  while (true) {
-    int idx = iterator.Next();
-    if (idx == -1) {
-      break;
-    }
+  for (uint32_t idx : bb->data_flow_info->def_v->Indexes()) {
     /* Block bb defines register idx */
     def_block_matrix_[idx]->SetBit(bb->id);
   }
@@ -182,22 +176,22 @@
     dom_post_order_traversal_->Reset();
   }
   ClearAllVisitedFlags();
-  std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*> > work_stack;
+  std::vector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack;
   bb->visited = true;
-  work_stack.push_back(std::make_pair(bb, bb->i_dominated->GetIterator()));
+  work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin()));
   while (!work_stack.empty()) {
-    const std::pair<BasicBlock*, ArenaBitVector::Iterator*>& curr = work_stack.back();
-    BasicBlock* curr_bb = curr.first;
-    ArenaBitVector::Iterator* curr_idom_iter = curr.second;
-    int bb_idx = curr_idom_iter->Next();
-    while ((bb_idx != -1) && (NeedsVisit(GetBasicBlock(bb_idx)) == NULL)) {
-      bb_idx = curr_idom_iter->Next();
+    std::pair<BasicBlock*, ArenaBitVector::IndexIterator>* curr = &work_stack.back();
+    BasicBlock* curr_bb = curr->first;
+    ArenaBitVector::IndexIterator* curr_idom_iter = &curr->second;
+    while (!curr_idom_iter->Done() && (NeedsVisit(GetBasicBlock(**curr_idom_iter)) == nullptr)) {
+      ++*curr_idom_iter;
     }
-    if (bb_idx != -1) {
-      BasicBlock* new_bb = GetBasicBlock(bb_idx);
+    // NOTE: work_stack.push_back()/pop_back() invalidate curr and curr_idom_iter.
+    if (!curr_idom_iter->Done()) {
+      BasicBlock* new_bb = GetBasicBlock(**curr_idom_iter);
+      ++*curr_idom_iter;
       new_bb->visited = true;
-      work_stack.push_back(
-          std::make_pair(new_bb, new_bb->i_dominated->GetIterator()));
+      work_stack.push_back(std::make_pair(new_bb, new_bb->i_dominated->Indexes().begin()));
     } else {
       // no successor/next
       if (curr_bb->id != NullBasicBlockId) {
@@ -249,11 +243,10 @@
   }
 
   /* Calculate DF_up */
-  BitVectorBlockIterator it(bb->i_dominated, cu_);
-  for (BasicBlock *dominated_bb = it.Next(); dominated_bb != nullptr; dominated_bb = it.Next()) {
-    BitVectorBlockIterator inner_it(dominated_bb->dom_frontier, cu_);
-    for (BasicBlock *df_up_block = inner_it.Next(); df_up_block != nullptr;
-         df_up_block = inner_it.Next()) {
+  for (uint32_t dominated_idx : bb->i_dominated->Indexes()) {
+    BasicBlock *dominated_bb = GetBasicBlock(dominated_idx);
+    for (uint32_t df_up_block_idx : dominated_bb->dom_frontier->Indexes()) {
+      BasicBlock *df_up_block = GetBasicBlock(df_up_block_idx);
       CheckForDominanceFrontier(bb, df_up_block);
     }
   }
@@ -449,7 +442,8 @@
  * insert a phi node if the variable is live-in to the block.
  */
 bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
-  ArenaBitVector* temp_dalvik_register_v = temp_dalvik_register_v_;
+  DCHECK_EQ(temp_bit_vector_size_, cu_->num_dalvik_registers);
+  ArenaBitVector* temp_dalvik_register_v = temp_bit_vector_;
 
   if (bb->data_flow_info == NULL) {
     return false;
@@ -487,15 +481,10 @@
 /* Insert phi nodes to for each variable to the dominance frontiers */
 void MIRGraph::InsertPhiNodes() {
   int dalvik_reg;
-  ArenaBitVector* phi_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapPhi);
-  ArenaBitVector* tmp_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapTmpBlocks);
-  ArenaBitVector* input_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapInputBlocks);
-
-  temp_dalvik_register_v_ =
-      new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapRegisterV);
+  ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapPhi);
+  ArenaBitVector* input_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapInputBlocks);
 
   RepeatingPostOrderDfsIterator iter(this);
   bool change = false;
@@ -505,60 +494,29 @@
 
   /* Iterate through each Dalvik register */
   for (dalvik_reg = cu_->num_dalvik_registers - 1; dalvik_reg >= 0; dalvik_reg--) {
-    bool change;
-
     input_blocks->Copy(def_block_matrix_[dalvik_reg]);
     phi_blocks->ClearAllBits();
-
-    /* Calculate the phi blocks for each Dalvik register */
     do {
-      change = false;
-      tmp_blocks->ClearAllBits();
-      ArenaBitVector::Iterator iterator(input_blocks);
-
-      while (true) {
-        int idx = iterator.Next();
-        if (idx == -1) {
-          break;
-        }
+      // TUNING: When we repeat this, we could skip indexes from the previous pass.
+      for (uint32_t idx : input_blocks->Indexes()) {
         BasicBlock* def_bb = GetBasicBlock(idx);
-
-        /* Merge the dominance frontier to tmp_blocks */
-        // TUNING: hot call to Union().
-        if (def_bb->dom_frontier != NULL) {
-          tmp_blocks->Union(def_bb->dom_frontier);
+        if (def_bb->dom_frontier != nullptr) {
+          phi_blocks->Union(def_bb->dom_frontier);
         }
       }
-      if (!phi_blocks->Equal(tmp_blocks)) {
-        change = true;
-        phi_blocks->Copy(tmp_blocks);
-
-        /*
-         * Iterate through the original blocks plus the new ones in
-         * the dominance frontier.
-         */
-        input_blocks->Copy(phi_blocks);
-        input_blocks->Union(def_block_matrix_[dalvik_reg]);
-      }
-    } while (change);
+    } while (input_blocks->Union(phi_blocks));
 
     /*
      * Insert a phi node for dalvik_reg in the phi_blocks if the Dalvik
      * register is in the live-in set.
      */
-    ArenaBitVector::Iterator iterator(phi_blocks);
-    while (true) {
-      int idx = iterator.Next();
-      if (idx == -1) {
-        break;
-      }
+    for (uint32_t idx : phi_blocks->Indexes()) {
       BasicBlock* phi_bb = GetBasicBlock(idx);
       /* Variable will be clobbered before being used - no need for phi */
       if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
         continue;
       }
-      MIR *phi =
-          static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocDFInfo));
+      MIR *phi = NewMIR();
       phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
       phi->dalvikInsn.vA = dalvik_reg;
       phi->offset = phi_bb->start_offset;
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 0f812a4..01c8f80 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -17,6 +17,7 @@
 #include "verified_method.h"
 
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -34,7 +35,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
-#include "UniquePtr.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
@@ -45,7 +45,7 @@
 
 const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
                                              bool compile) {
-  UniquePtr<VerifiedMethod> verified_method(new VerifiedMethod);
+  std::unique_ptr<VerifiedMethod> verified_method(new VerifiedMethod);
   if (compile) {
     /* Generate a register map. */
     if (!verified_method->GenerateGcMap(method_verifier)) {
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 4be0f59..95b3d86 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -124,7 +124,7 @@
 bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
   SSARepresentation *ssa_rep = mir->ssa_rep;
   if (ssa_rep) {
-    uint64_t attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t attrs = GetDataFlowAttributes(mir);
     const int* uses = ssa_rep->uses;
     const int* defs = ssa_rep->defs;
 
@@ -403,7 +403,7 @@
 }
 
 // FIXME - will likely need to revisit all uses of this.
-static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, kVectorNotUsed,
+static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
                                       RegStorage(), INVALID_SREG, INVALID_SREG};
 
 void MIRGraph::InitRegLocations() {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index d9f2a3a..45abfcc 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -28,7 +28,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -42,10 +42,10 @@
 }
 
 inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass(
-    ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-    const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit) {
-  DCHECK(dex_cache->GetDexFile() == mUnit->GetDexFile());
-  DCHECK(class_loader.get() == soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) {
+  DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
+  DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   const DexFile::MethodId& referrer_method_id =
       mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
   mirror::Class* referrer_class = mUnit->GetClassLinker()->ResolveType(
@@ -59,11 +59,11 @@
 }
 
 inline mirror::ArtField* CompilerDriver::ResolveField(
-    ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-    const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t field_idx, bool is_static) {
-  DCHECK(dex_cache->GetDexFile() == mUnit->GetDexFile());
-  DCHECK(class_loader.get() == soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
+  DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   mirror::ArtField* resolved_field = mUnit->GetClassLinker()->ResolveField(
       *mUnit->GetDexFile(), field_idx, dex_cache, class_loader, is_static);
   DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending());
@@ -165,13 +165,14 @@
 }
 
 inline mirror::ArtMethod* CompilerDriver::ResolveMethod(
-    ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-    const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type) {
-  DCHECK(dex_cache->GetDexFile() == mUnit->GetDexFile());
-  DCHECK(class_loader.get() == soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
+  DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   mirror::ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *mUnit->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
+      *mUnit->GetDexFile(), method_idx, dex_cache, class_loader, NullHandle<mirror::ArtMethod>(),
+      invoke_type);
   DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(resolved_method == nullptr)) {
     // Clean up any exception left by type resolution.
@@ -206,8 +207,8 @@
 }
 
 inline int CompilerDriver::IsFastInvoke(
-    ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-    const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+    ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     mirror::Class* referrer_class, mirror::ArtMethod* resolved_method, InvokeType* invoke_type,
     MethodReference* target_method, const MethodReference* devirt_target,
     uintptr_t* direct_code, uintptr_t* direct_method) {
@@ -217,7 +218,7 @@
   }
   mirror::Class* methods_class = resolved_method->GetDeclaringClass();
   if (UNLIKELY(!referrer_class->CanAccessResolvedMethod(methods_class, resolved_method,
-                                                        dex_cache.get(),
+                                                        dex_cache.Get(),
                                                         target_method->dex_method_index))) {
     return 0;
   }
@@ -237,7 +238,7 @@
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
     // dex cache, check that this resolved method is where we expect it.
     CHECK(target_method->dex_file == mUnit->GetDexFile());
-    DCHECK(dex_cache.get() == mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+    DCHECK(dex_cache.Get() == mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
     CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method->dex_method_index) ==
         resolved_method) << PrettyMethod(resolved_method);
     int stats_flags = kFlagMethodResolved;
@@ -256,14 +257,17 @@
     ClassLinker* class_linker = mUnit->GetClassLinker();
     if (LIKELY(devirt_target->dex_file == mUnit->GetDexFile())) {
       called_method = class_linker->ResolveMethod(*devirt_target->dex_file,
-                                                  devirt_target->dex_method_index,
-                                                  dex_cache, class_loader, NULL, kVirtual);
+                                                  devirt_target->dex_method_index, dex_cache,
+                                                  class_loader, NullHandle<mirror::ArtMethod>(),
+                                                  kVirtual);
     } else {
-      SirtRef<mirror::DexCache> target_dex_cache(soa.Self(),
-          class_linker->FindDexCache(*devirt_target->dex_file));
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::DexCache> target_dex_cache(
+          hs.NewHandle(class_linker->FindDexCache(*devirt_target->dex_file)));
       called_method = class_linker->ResolveMethod(*devirt_target->dex_file,
                                                   devirt_target->dex_method_index,
-                                                  target_dex_cache, class_loader, NULL, kVirtual);
+                                                  target_dex_cache, class_loader,
+                                                  NullHandle<mirror::ArtMethod>(), kVirtual);
     }
     CHECK(called_method != NULL);
     CHECK(!called_method->IsAbstract());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 0ad30be..3304561 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -49,7 +49,7 @@
 #include "mirror/throwable.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
@@ -336,7 +336,7 @@
     : profile_ok_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
-      compiler_(Compiler::Create(compiler_kind)),
+      compiler_(Compiler::Create(this, compiler_kind)),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
       freezing_constructor_lock_("freezing constructor lock"),
@@ -374,11 +374,13 @@
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
 
-  compiler_->Init(*this);
+  compiler_->Init();
 
   CHECK(!Runtime::Current()->IsStarted());
-  if (!image_) {
-    CHECK(image_classes_.get() == NULL);
+  if (image_) {
+    CHECK(image_classes_.get() != nullptr);
+  } else {
+    CHECK(image_classes_.get() == nullptr);
   }
 
   // Are we generating CFI information?
@@ -433,7 +435,7 @@
     STLDeleteElements(&classes_to_patch_);
   }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
-  compiler_->UnInit(*this);
+  compiler_->UnInit();
 }
 
 CompilerTls* CompilerDriver::GetTls() {
@@ -500,7 +502,7 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
   if (dump_stats_) {
@@ -509,7 +511,7 @@
 }
 
 static DexToDexCompilationLevel GetDexToDexCompilationlevel(
-    Thread* self, SirtRef<mirror::ClassLoader>& class_loader, const DexFile& dex_file,
+    Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file,
     const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -524,7 +526,7 @@
   // function). Since image classes can be verified again while compiling an application,
   // we must prevent the DEX-to-DEX compiler from introducing them.
   // TODO: find a way to enable "quick" instructions for image classes and remove this check.
-  bool compiling_image_classes = class_loader.get() == nullptr;
+  bool compiling_image_classes = class_loader.Get() == nullptr;
   if (compiling_image_classes) {
     return kRequired;
   } else if (klass->IsVerified()) {
@@ -566,7 +568,7 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
+  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
@@ -574,8 +576,9 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(self, class_loader, *dex_file,
                                                                class_def);
   }
@@ -591,7 +594,7 @@
                              ThreadPool* thread_pool, TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
-    CHECK(dex_file != NULL);
+    CHECK(dex_file != nullptr);
     ResolveDexFile(class_loader, *dex_file, thread_pool, timings);
   }
 }
@@ -623,7 +626,7 @@
 }
 
 static void ResolveExceptionsForMethod(MethodHelper* mh,
-    std::set<std::pair<uint16_t, const DexFile*> >& exceptions_to_resolve)
+    std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const DexFile::CodeItem* code_item = mh->GetCodeItem();
   if (code_item == NULL) {
@@ -662,8 +665,8 @@
 
 static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  std::set<std::pair<uint16_t, const DexFile*> >* exceptions_to_resolve =
-      reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*> >*>(arg);
+  std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve =
+      reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
   MethodHelper mh;
   for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
     mirror::ArtMethod* m = c->GetVirtualMethod(i);
@@ -682,13 +685,14 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   CompilerDriver::DescriptorSet* image_classes =
       reinterpret_cast<CompilerDriver::DescriptorSet*>(arg);
-  image_classes->insert(ClassHelper(klass).GetDescriptor());
+  image_classes->insert(klass->GetDescriptor());
   return true;
 }
 
 // Make a list of descriptors for classes to include in the image
 void CompilerDriver::LoadImageClasses(TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_) {
+  CHECK(timings != nullptr);
   if (!IsImage()) {
     return;
   }
@@ -698,10 +702,13 @@
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  CHECK(image_classes_.get() != nullptr);
   for (auto it = image_classes_->begin(), end = image_classes_->end(); it != end;) {
     const std::string& descriptor(*it);
-    SirtRef<mirror::Class> klass(self, class_linker->FindSystemClass(self, descriptor.c_str()));
-    if (klass.get() == NULL) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> klass(
+        hs.NewHandle(class_linker->FindSystemClass(self, descriptor.c_str())));
+    if (klass.Get() == NULL) {
       VLOG(compiler) << "Failed to find class " << descriptor;
       image_classes_->erase(it++);
       self->ClearException();
@@ -713,9 +720,10 @@
   // Resolve exception classes referenced by the loaded classes. The catch logic assumes
   // exceptions are resolved by the verifier when there is a catch block in an interested method.
   // Do this here so that exception classes appear to have been specified image classes.
-  std::set<std::pair<uint16_t, const DexFile*> > unresolved_exception_types;
-  SirtRef<mirror::Class> java_lang_Throwable(self,
-                                     class_linker->FindSystemClass(self, "Ljava/lang/Throwable;"));
+  std::set<std::pair<uint16_t, const DexFile*>> unresolved_exception_types;
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> java_lang_Throwable(
+      hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;")));
   do {
     unresolved_exception_types.clear();
     class_linker->VisitClasses(ResolveCatchBlockExceptionsClassVisitor,
@@ -723,16 +731,17 @@
     for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
-      SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(*dex_file));
-      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
-      SirtRef<mirror::Class> klass(self, class_linker->ResolveType(*dex_file, exception_type_idx,
-                                                                   dex_cache, class_loader));
-      if (klass.get() == NULL) {
+      StackHandleScope<2> hs(self);
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(*dex_file)));
+      Handle<mirror::Class> klass(hs.NewHandle(
+          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
+                                    NullHandle<mirror::ClassLoader>())));
+      if (klass.Get() == NULL) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
         LOG(FATAL) << "Failed to resolve class " << descriptor;
       }
-      DCHECK(java_lang_Throwable->IsAssignableFrom(klass.get()));
+      DCHECK(java_lang_Throwable->IsAssignableFrom(klass.Get()));
     }
     // Resolving exceptions may load classes that reference more exceptions, iterate until no
     // more are found
@@ -746,11 +755,15 @@
   CHECK_NE(image_classes_->size(), 0U);
 }
 
-static void MaybeAddToImageClasses(mirror::Class* klass, CompilerDriver::DescriptorSet* image_classes)
+static void MaybeAddToImageClasses(Handle<mirror::Class> c,
+                                   CompilerDriver::DescriptorSet* image_classes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  // Make a copy of the handle so that we don't clobber it doing Assign.
+  Handle<mirror::Class> klass(hs.NewHandle(c.Get()));
   while (!klass->IsObjectClass()) {
-    ClassHelper kh(klass);
-    const char* descriptor = kh.GetDescriptor();
+    std::string descriptor(klass->GetDescriptor());
     std::pair<CompilerDriver::DescriptorSet::iterator, bool> result =
         image_classes->insert(descriptor);
     if (result.second) {
@@ -758,13 +771,16 @@
     } else {
       return;
     }
-    for (size_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
-      MaybeAddToImageClasses(kh.GetDirectInterface(i), image_classes);
+    for (size_t i = 0; i < klass->NumDirectInterfaces(); ++i) {
+      StackHandleScope<1> hs(self);
+      MaybeAddToImageClasses(hs.NewHandle(mirror::Class::GetDirectInterface(self, klass, i)),
+                             image_classes);
     }
     if (klass->IsArrayClass()) {
-      MaybeAddToImageClasses(klass->GetComponentType(), image_classes);
+      StackHandleScope<1> hs(self);
+      MaybeAddToImageClasses(hs.NewHandle(klass->GetComponentType()), image_classes);
     }
-    klass = klass->GetSuperClass();
+    klass.Assign(klass->GetSuperClass());
   }
 }
 
@@ -772,7 +788,8 @@
   DCHECK(object != NULL);
   DCHECK(arg != NULL);
   CompilerDriver* compiler_driver = reinterpret_cast<CompilerDriver*>(arg);
-  MaybeAddToImageClasses(object->GetClass(), compiler_driver->image_classes_.get());
+  StackHandleScope<1> hs(Thread::Current());
+  MaybeAddToImageClasses(hs.NewHandle(object->GetClass()), compiler_driver->image_classes_.get());
 }
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
@@ -816,7 +833,9 @@
   if (IsImage()) {
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
-    SirtRef<mirror::DexCache> dex_cache(soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(dex_file));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache(
+        hs.NewHandle(Runtime::Current()->GetClassLinker()->FindDexCache(dex_file)));
     Runtime::Current()->GetClassLinker()->ResolveString(dex_file, string_idx, dex_cache);
     result = true;
   }
@@ -905,13 +924,14 @@
 
 bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                                         bool* is_type_initialized, bool* use_direct_type_ptr,
-                                        uintptr_t* direct_type_ptr) {
+                                        uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     return false;
   }
+  *out_is_finalizable = resolved_class->IsFinalizable();
   const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
   if (compiling_boot) {
     // boot -> boot class pointers.
@@ -979,16 +999,17 @@
   mirror::Class* referrer_class;
   mirror::DexCache* dex_cache;
   {
-    SirtRef<mirror::DexCache> dex_cache_sirt(soa.Self(),
-        mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
-    SirtRef<mirror::ClassLoader> class_loader_sirt(soa.Self(),
-        soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
-    SirtRef<mirror::ArtField> resolved_field_sirt(soa.Self(),
-        ResolveField(soa, dex_cache_sirt, class_loader_sirt, mUnit, field_idx, false));
-    referrer_class = (resolved_field_sirt.get() != nullptr)
-        ? ResolveCompilingMethodsClass(soa, dex_cache_sirt, class_loader_sirt, mUnit) : nullptr;
-    resolved_field = resolved_field_sirt.get();
-    dex_cache = dex_cache_sirt.get();
+    StackHandleScope<3> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache_handle(
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+    Handle<mirror::ClassLoader> class_loader_handle(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
+    Handle<mirror::ArtField> resolved_field_handle(hs.NewHandle(
+        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, false)));
+    referrer_class = (resolved_field_handle.Get() != nullptr)
+        ? ResolveCompilingMethodsClass(soa, dex_cache_handle, class_loader_handle, mUnit) : nullptr;
+    resolved_field = resolved_field_handle.Get();
+    dex_cache = dex_cache_handle.Get();
   }
   bool result = false;
   if (resolved_field != nullptr && referrer_class != nullptr) {
@@ -1016,16 +1037,17 @@
   mirror::Class* referrer_class;
   mirror::DexCache* dex_cache;
   {
-    SirtRef<mirror::DexCache> dex_cache_sirt(soa.Self(),
-        mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
-    SirtRef<mirror::ClassLoader> class_loader_sirt(soa.Self(),
-        soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
-    SirtRef<mirror::ArtField> resolved_field_sirt(soa.Self(),
-        ResolveField(soa, dex_cache_sirt, class_loader_sirt, mUnit, field_idx, true));
-    referrer_class = (resolved_field_sirt.get() != nullptr)
-        ? ResolveCompilingMethodsClass(soa, dex_cache_sirt, class_loader_sirt, mUnit) : nullptr;
-    resolved_field = resolved_field_sirt.get();
-    dex_cache = dex_cache_sirt.get();
+    StackHandleScope<3> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache_handle(
+        hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+    Handle<mirror::ClassLoader> class_loader_handle(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
+    Handle<mirror::ArtField> resolved_field_handle(hs.NewHandle(
+        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, true)));
+    referrer_class = (resolved_field_handle.Get() != nullptr)
+        ? ResolveCompilingMethodsClass(soa, dex_cache_handle, class_loader_handle, mUnit) : nullptr;
+    resolved_field = resolved_field_handle.Get();
+    dex_cache = dex_cache_handle.Get();
   }
   bool result = false;
   if (resolved_field != nullptr && referrer_class != nullptr) {
@@ -1130,28 +1152,22 @@
       *type = sharp_type;
     }
   } else {
-    if (compiling_boot) {
+    bool method_in_image = compiling_boot ||
+        Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace();
+    if (method_in_image) {
+      CHECK(!method->IsAbstract());
       *type = sharp_type;
-      *direct_method = -1;
-      *direct_code = -1;
+      *direct_method = compiling_boot ? -1 : reinterpret_cast<uintptr_t>(method);
+      *direct_code = compiling_boot ? -1 : compiler_->GetEntryPointOf(method);
+      target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+      target_method->dex_method_index = method->GetDexMethodIndex();
+    } else if (!must_use_direct_pointers) {
+      // Set the code and rely on the dex cache for the method.
+      *type = sharp_type;
+      *direct_code = compiler_->GetEntryPointOf(method);
     } else {
-      bool method_in_image =
-          Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace();
-      if (method_in_image) {
-        CHECK(!method->IsAbstract());
-        *type = sharp_type;
-        *direct_method = reinterpret_cast<uintptr_t>(method);
-        *direct_code = compiler_->GetEntryPointOf(method);
-        target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-        target_method->dex_method_index = method->GetDexMethodIndex();
-      } else if (!must_use_direct_pointers) {
-        // Set the code and rely on the dex cache for the method.
-        *type = sharp_type;
-        *direct_code = compiler_->GetEntryPointOf(method);
-      } else {
-        // Direct pointers were required but none were available.
-        VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
-      }
+      // Direct pointers were required but none were available.
+      VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
     }
   }
 }
@@ -1167,17 +1183,18 @@
   // Try to resolve the method and compiling method's class.
   mirror::ArtMethod* resolved_method;
   mirror::Class* referrer_class;
-  SirtRef<mirror::DexCache> dex_cache(soa.Self(),
-      mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-      soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
   {
     uint32_t method_idx = target_method->dex_method_index;
-    SirtRef<mirror::ArtMethod> resolved_method_sirt(soa.Self(),
-        ResolveMethod(soa, dex_cache, class_loader, mUnit, method_idx, orig_invoke_type));
-    referrer_class = (resolved_method_sirt.get() != nullptr)
+    Handle<mirror::ArtMethod> resolved_method_handle(hs.NewHandle(
+        ResolveMethod(soa, dex_cache, class_loader, mUnit, method_idx, orig_invoke_type)));
+    referrer_class = (resolved_method_handle.Get() != nullptr)
         ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit) : nullptr;
-    resolved_method = resolved_method_sirt.get();
+    resolved_method = resolved_method_handle.Get();
   }
   bool result = false;
   if (resolved_method != nullptr) {
@@ -1195,7 +1212,7 @@
       // Devirtualization not enabled. Inline IsFastInvoke(), dropping the devirtualization parts.
       if (UNLIKELY(referrer_class == nullptr) ||
           UNLIKELY(!referrer_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
-                                                            resolved_method, dex_cache.get(),
+                                                            resolved_method, dex_cache.Get(),
                                                             target_method->dex_method_index)) ||
           *invoke_type == kSuper) {
         // Slow path. (Without devirtualization, all super calls go slow path as well.)
@@ -1346,7 +1363,7 @@
     self->AssertNoPendingException();
     CHECK_GT(work_units, 0U);
 
-    index_ = begin;
+    index_.StoreRelaxed(begin);
     for (size_t i = 0; i < work_units; ++i) {
       thread_pool_->AddTask(self, new ForAllClosure(this, end, callback));
     }
@@ -1361,7 +1378,7 @@
   }
 
   size_t NextIndex() {
-    return index_.FetchAndAdd(1);
+    return index_.FetchAndAddSequentiallyConsistent(1);
   }
 
  private:
@@ -1468,8 +1485,10 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
   if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
     ScopedObjectAccess soa(self);
-    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
-    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
     // Resolve the class.
     mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
                                                      class_loader);
@@ -1522,7 +1541,8 @@
       if (resolve_fields_and_methods) {
         while (it.HasNextDirectMethod()) {
           mirror::ArtMethod* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(),
-                                                                  dex_cache, class_loader, NULL,
+                                                                  dex_cache, class_loader,
+                                                                  NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
             CHECK(soa.Self()->IsExceptionPending());
@@ -1532,7 +1552,8 @@
         }
         while (it.HasNextVirtualMethod()) {
           mirror::ArtMethod* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(),
-                                                                  dex_cache, class_loader, NULL,
+                                                                  dex_cache, class_loader,
+                                                                  NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
             CHECK(soa.Self()->IsExceptionPending());
@@ -1555,17 +1576,17 @@
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* class_linker = manager->GetClassLinker();
   const DexFile& dex_file = *manager->GetDexFile();
-  SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader()));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader())));
   mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
   if (klass == NULL) {
     CHECK(soa.Self()->IsExceptionPending());
     mirror::Throwable* exception = soa.Self()->GetException(NULL);
     VLOG(compiler) << "Exception during type resolution: " << exception->Dump();
-    if (strcmp("Ljava/lang/OutOfMemoryError;",
-               ClassHelper(exception->GetClass()).GetDescriptor()) == 0) {
+    if (exception->GetClass()->DescriptorEquals("Ljava/lang/OutOfMemoryError;")) {
       // There's little point continuing compilation if the heap is exhausted.
       LOG(FATAL) << "Out of memory during type resolution for compilation";
     }
@@ -1610,11 +1631,12 @@
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = manager->GetClassLinker();
   jobject jclass_loader = manager->GetClassLoader();
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
-  SirtRef<mirror::Class> klass(soa.Self(), class_linker->FindClass(soa.Self(), descriptor,
-                                                                   class_loader));
-  if (klass.get() == nullptr) {
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+  Handle<mirror::Class> klass(
+      hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader)));
+  if (klass.Get() == nullptr) {
     CHECK(soa.Self()->IsExceptionPending());
     soa.Self()->ClearException();
 
@@ -1623,7 +1645,7 @@
      * This is to ensure the class is structurally sound for compilation. An unsound class
      * will be rejected by the verifier and later skipped during compilation in the compiler.
      */
-    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file)));
     std::string error_msg;
     if (verifier::MethodVerifier::VerifyClass(&dex_file, dex_cache, class_loader, &class_def, true,
                                               &error_msg) ==
@@ -1631,8 +1653,8 @@
       LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                  << " because: " << error_msg;
     }
-  } else if (!SkipClass(jclass_loader, dex_file, klass.get())) {
-    CHECK(klass->IsResolved()) << PrettyClass(klass.get());
+  } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
+    CHECK(klass->IsResolved()) << PrettyClass(klass.Get());
     class_linker->VerifyClass(klass);
 
     if (klass->IsErroneous()) {
@@ -1642,7 +1664,7 @@
     }
 
     CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous())
-        << PrettyDescriptor(klass.get()) << ": state=" << klass->GetStatus();
+        << PrettyDescriptor(klass.Get()) << ": state=" << klass->GetStatus();
   }
   soa.Self()->AssertNoPendingException();
 }
@@ -1665,13 +1687,13 @@
   const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
 
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
-  SirtRef<mirror::Class> klass(soa.Self(),
-                               manager->GetClassLinker()->FindClass(soa.Self(), descriptor,
-                                                                    class_loader));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+  Handle<mirror::Class> klass(
+      hs.NewHandle(manager->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader)));
 
-  if (klass.get() != nullptr && !SkipClass(jclass_loader, dex_file, klass.get())) {
+  if (klass.Get() != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) {
     // Only try to initialize classes that were successfully verified.
     if (klass->IsVerified()) {
       // Attempt to initialize the class but bail if we either need to initialize the super-class
@@ -1686,8 +1708,8 @@
         // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
         // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
         // than use a special Object for the purpose we use the Class of java.lang.Class.
-        SirtRef<mirror::Class> sirt_klass(soa.Self(), klass->GetClass());
-        ObjectLock<mirror::Class> lock(soa.Self(), &sirt_klass);
+        Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
+        ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
         // Attempt to initialize allowing initialization of parent classes but still not static
         // fields.
         manager->GetClassLinker()->EnsureInitialized(klass, false, true);
@@ -1699,44 +1721,34 @@
               !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
           if (can_init_static_fields) {
             VLOG(compiler) << "Initializing: " << descriptor;
-            if (strcmp("Ljava/lang/Void;", descriptor) == 0) {
-              // Hand initialize j.l.Void to avoid Dex file operations in un-started runtime.
-              ObjectLock<mirror::Class> lock(soa.Self(), &klass);
-              mirror::ObjectArray<mirror::ArtField>* fields = klass->GetSFields();
-              CHECK_EQ(fields->GetLength(), 1);
-              fields->Get(0)->SetObj<false>(klass.get(),
-                                                     manager->GetClassLinker()->FindPrimitiveClass('V'));
-              klass->SetStatus(mirror::Class::kStatusInitialized, soa.Self());
-            } else {
-              // TODO multithreading support. We should ensure the current compilation thread has
-              // exclusive access to the runtime and the transaction. To achieve this, we could use
-              // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
-              // checks in Thread::AssertThreadSuspensionIsAllowable.
-              Runtime* const runtime = Runtime::Current();
-              Transaction transaction;
+            // TODO multithreading support. We should ensure the current compilation thread has
+            // exclusive access to the runtime and the transaction. To achieve this, we could use
+            // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
+            // checks in Thread::AssertThreadSuspensionIsAllowable.
+            Runtime* const runtime = Runtime::Current();
+            Transaction transaction;
 
-              // Run the class initializer in transaction mode.
-              runtime->EnterTransactionMode(&transaction);
-              const mirror::Class::Status old_status = klass->GetStatus();
-              bool success = manager->GetClassLinker()->EnsureInitialized(klass, true, true);
-              // TODO we detach transaction from runtime to indicate we quit the transactional
-              // mode which prevents the GC from visiting objects modified during the transaction.
-              // Ensure GC is not run so don't access freed objects when aborting transaction.
-              const char* old_casue = soa.Self()->StartAssertNoThreadSuspension("Transaction end");
-              runtime->ExitTransactionMode();
+            // Run the class initializer in transaction mode.
+            runtime->EnterTransactionMode(&transaction);
+            const mirror::Class::Status old_status = klass->GetStatus();
+            bool success = manager->GetClassLinker()->EnsureInitialized(klass, true, true);
+            // TODO we detach transaction from runtime to indicate we quit the transactional
+            // mode which prevents the GC from visiting objects modified during the transaction.
+            // Ensure GC is not run so don't access freed objects when aborting transaction.
+            const char* old_casue = soa.Self()->StartAssertNoThreadSuspension("Transaction end");
+            runtime->ExitTransactionMode();
 
-              if (!success) {
-                CHECK(soa.Self()->IsExceptionPending());
-                ThrowLocation throw_location;
-                mirror::Throwable* exception = soa.Self()->GetException(&throw_location);
-                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                               << exception->Dump();
-                soa.Self()->ClearException();
-                transaction.Abort();
-                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
-              }
-              soa.Self()->EndAssertNoThreadSuspension(old_casue);
+            if (!success) {
+              CHECK(soa.Self()->IsExceptionPending());
+              ThrowLocation throw_location;
+              mirror::Throwable* exception = soa.Self()->GetException(&throw_location);
+              VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                  << exception->Dump();
+              soa.Self()->ClearException();
+              transaction.Abort();
+              CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
             }
+            soa.Self()->EndAssertNoThreadSuspension(old_casue);
           }
         }
         soa.Self()->AssertNoPendingException();
@@ -1812,8 +1824,9 @@
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
     ScopedObjectAccess soa(Thread::Current());
-    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(soa.Self(), class_loader, dex_file,
                                                                class_def);
   }
@@ -1883,7 +1896,7 @@
         (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
-      compiled_method = compiler_->JniCompile(*this, access_flags, method_idx, dex_file);
+      compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
       CHECK(compiled_method != NULL);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
@@ -1892,9 +1905,8 @@
     bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return NULL.
-      compiled_method = compiler_->Compile(
-          *this, code_item, access_flags, invoke_type, class_def_idx,
-          method_idx, class_loader, dex_file);
+      compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx,
+                                           method_idx, class_loader, dex_file);
     }
     if (compiled_method == nullptr && dex_to_dex_compilation_level != kDontDexToDexCompile) {
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
@@ -1992,7 +2004,7 @@
                               OatWriter* oat_writer,
                               art::File* file)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host, *this);
+  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
 }
 void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
                                                 std::string* target_triple,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d7d40d5..14ccb50 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -52,7 +52,7 @@
 class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
-template<class T> class SirtRef;
+template<class T> class Handle;
 class TimingLogger;
 class VerificationResults;
 class VerifiedMethod;
@@ -210,7 +210,7 @@
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                           bool* is_type_initialized, bool* use_direct_type_ptr,
-                          uintptr_t* direct_type_ptr);
+                          uintptr_t* direct_type_ptr, bool* out_is_finalizable);
 
   // Get the DexCache for the
   mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
@@ -221,15 +221,15 @@
 
   // Resolve compiling method's class. Returns nullptr on failure.
   mirror::Class* ResolveCompilingMethodsClass(
-      ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-      const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit)
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a field. Returns nullptr on failure, including incompatible class change.
   // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static.
   mirror::ArtField* ResolveField(
-      ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-      const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t field_idx, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -258,8 +258,8 @@
 
   // Resolve a method. Returns nullptr on failure, including incompatible class change.
   mirror::ArtMethod* ResolveMethod(
-      ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-      const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t method_idx, InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -277,8 +277,8 @@
   // Can we fast-path an INVOKE? If no, returns 0. If yes, returns a non-zero opaque flags value
   // for ProcessedInvoke() and computes the necessary lowering info.
   int IsFastInvoke(
-      ScopedObjectAccess& soa, const SirtRef<mirror::DexCache>& dex_cache,
-      const SirtRef<mirror::ClassLoader>& class_loader, const DexCompilationUnit* mUnit,
+      ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
+      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       mirror::Class* referrer_class, mirror::ArtMethod* resolved_method, InvokeType* invoke_type,
       MethodReference* target_method, const MethodReference* devirt_target,
       uintptr_t* direct_code, uintptr_t* direct_method)
@@ -688,7 +688,7 @@
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
 
-  UniquePtr<Compiler> compiler_;
+  std::unique_ptr<Compiler> compiler_;
 
   const InstructionSet instruction_set_;
   const InstructionSetFeatures instruction_set_features_;
@@ -712,13 +712,13 @@
   // If image_ is true, specifies the classes that will be included in
   // the image. Note if image_classes_ is NULL, all classes are
   // included in the image.
-  UniquePtr<DescriptorSet> image_classes_;
+  std::unique_ptr<DescriptorSet> image_classes_;
 
   size_t thread_count_;
   uint64_t start_ns_;
 
   class AOTCompilationStats;
-  UniquePtr<AOTCompilationStats> stats_;
+  std::unique_ptr<AOTCompilationStats> stats_;
 
   bool dump_stats_;
   const bool dump_passes_;
@@ -755,7 +755,7 @@
   bool support_boot_image_fixup_;
 
   // Call Frame Information, which might be generated to help stack tracebacks.
-  UniquePtr<std::vector<uint8_t> > cfi_info_;
+  std::unique_ptr<std::vector<uint8_t>> cfi_info_;
 
   // DeDuplication data structures, these own the corresponding byte arrays.
   class DedupeHashFunc {
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 86034c8..964dfeb 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -18,8 +18,8 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
@@ -30,7 +30,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -80,7 +80,9 @@
       const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
       const char* descriptor = dex_file.GetClassDescriptor(class_def);
       ScopedObjectAccess soa(Thread::Current());
-      SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(class_loader));
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ClassLoader> loader(
+          hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
       mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader);
       CHECK(c != NULL);
       for (size_t i = 0; i < c->NumDirectMethods(); i++) {
@@ -150,9 +152,9 @@
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    SirtRef<mirror::ClassLoader> null_loader(soa.Self(), nullptr);
-    CompileVirtualMethod(null_loader, "java.lang.Class", "isFinalizable", "()Z");
-    CompileDirectMethod(null_loader, "java.lang.Object", "<init>", "()V");
+    CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable",
+                         "()Z");
+    CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != NULL);
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
index 6fd4a73..404e3f8 100644
--- a/compiler/elf_fixup.cc
+++ b/compiler/elf_fixup.cc
@@ -17,12 +17,12 @@
 #include "elf_fixup.h"
 
 #include <inttypes.h>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "elf_file.h"
 #include "elf_writer.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -30,7 +30,7 @@
 
 bool ElfFixup::Fixup(File* file, uintptr_t oat_data_begin) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
   CHECK(elf_file.get() != nullptr) << error_msg;
 
   // Lookup "oatdata" symbol address.
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
index 42291b2..8c06c9f 100644
--- a/compiler/elf_stripper.cc
+++ b/compiler/elf_stripper.cc
@@ -18,9 +18,9 @@
 
 #include <unistd.h>
 #include <sys/types.h>
+#include <memory>
 #include <vector>
 
-#include "UniquePtr.h"
 #include "base/logging.h"
 #include "elf_file.h"
 #include "elf_utils.h"
@@ -29,7 +29,7 @@
 namespace art {
 
 bool ElfStripper::Strip(File* file, std::string* error_msg) {
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
   if (elf_file.get() == nullptr) {
     return false;
   }
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index ccc26a1..4c093c7 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -42,7 +42,7 @@
                                      size_t& oat_loaded_size,
                                      size_t& oat_data_offset) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
   CHECK(elf_file.get() != NULL) << error_msg;
 
   oat_loaded_size = elf_file->GetLoadedSize();
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index f688103..3dba426 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -159,7 +159,7 @@
 void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
   // Add an artificial memory input. Based on LinkerTest.
   std::string error_msg;
-  UniquePtr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg));
+  std::unique_ptr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg));
   CHECK(oat_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg;
 
   const char* oat_data_start = reinterpret_cast<const char*>(&oat_file->GetOatHeader());
@@ -347,7 +347,7 @@
 
 void ElfWriterMclinker::FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files) {
   std::string error_msg;
-  UniquePtr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg));
   CHECK(elf_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg;
 
   uint32_t oatdata_address = GetOatDataAddress(elf_file.get());
@@ -361,9 +361,11 @@
       ClassLinker* linker = Runtime::Current()->GetClassLinker();
       // Unchecked as we hold mutator_lock_ on entry.
       ScopedObjectAccessUnchecked soa(Thread::Current());
-      SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(dex_file));
-      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
-      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, class_loader, NULL, invoke_type);
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(dex_file)));
+      method = linker->ResolveMethod(dex_file, method_idx, dex_cache,
+                                     NullHandle<mirror::ClassLoader>(),
+                                     NullHandle<mirror::ArtMethod>(), invoke_type);
       CHECK(method != NULL);
     }
     const CompiledMethod* compiled_method =
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index 13757ed..955e5d2 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -17,9 +17,9 @@
 #ifndef ART_COMPILER_ELF_WRITER_MCLINKER_H_
 #define ART_COMPILER_ELF_WRITER_MCLINKER_H_
 
-#include "elf_writer.h"
+#include <memory>
 
-#include "UniquePtr.h"
+#include "elf_writer.h"
 #include "safe_map.h"
 
 namespace mcld {
@@ -73,11 +73,11 @@
                                    const CompiledCode& compiled_code);
 
   // Setup by Init()
-  UniquePtr<mcld::LinkerConfig> linker_config_;
-  UniquePtr<mcld::LinkerScript> linker_script_;
-  UniquePtr<mcld::Module> module_;
-  UniquePtr<mcld::IRBuilder> ir_builder_;
-  UniquePtr<mcld::Linker> linker_;
+  std::unique_ptr<mcld::LinkerConfig> linker_config_;
+  std::unique_ptr<mcld::LinkerScript> linker_script_;
+  std::unique_ptr<mcld::Module> module_;
+  std::unique_ptr<mcld::IRBuilder> ir_builder_;
+  std::unique_ptr<mcld::Linker> linker_;
 
   // Setup by AddOatInput()
   // TODO: ownership of oat_input_?
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index 864dadc..e637cfb 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -44,18 +44,15 @@
   } while (false)
 
 TEST_F(ElfWriterTest, dlsym) {
-  std::string elf_filename;
+  std::string elf_location;
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
     CHECK(host_dir != NULL);
-    elf_filename = StringPrintf("%s/framework/core.oat", host_dir);
+    elf_location = StringPrintf("%s/framework/core.oat", host_dir);
   } else {
-#ifdef __LP64__
-    elf_filename = "/data/art-test64/core.oat";
-#else
-    elf_filename = "/data/art-test/core.oat";
-#endif
+    elf_location = "/data/art-test/core.oat";
   }
+  std::string elf_filename = GetSystemImageFilename(elf_location.c_str(), kRuntimeISA);
   LOG(INFO) << "elf_filename=" << elf_filename;
 
   UnreserveImageSpace();
@@ -85,11 +82,11 @@
   }
 #endif
 
-  UniquePtr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
   ASSERT_TRUE(file.get() != NULL);
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", false);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", false);
@@ -97,7 +94,7 @@
   }
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", true);
     EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", true);
@@ -105,7 +102,7 @@
   }
   {
     std::string error_msg;
-    UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg));
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
     CHECK(ef->Load(false, &error_msg)) << error_msg;
     EXPECT_EQ(dl_oatdata, ef->FindDynamicSymbolAddress("oatdata"));
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7c5741b..92be147 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -16,6 +16,7 @@
 
 #include "image.h"
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -27,7 +28,6 @@
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "signal_catcher.h"
-#include "UniquePtr.h"
 #include "utils.h"
 #include "vector_output_stream.h"
 
@@ -42,9 +42,23 @@
 };
 
 TEST_F(ImageTest, WriteRead) {
-  // Create a root tmp file, to be the base of the .art and .oat temporary files.
-  ScratchFile tmp;
-  ScratchFile tmp_elf(tmp, "oat");
+  // Create a generic location tmp file, to be the base of the .art and .oat temporary files.
+  ScratchFile location;
+  ScratchFile image_location(location, ".art");
+
+  std::string image_filename(GetSystemImageFilename(image_location.GetFilename().c_str(),
+                                                    kRuntimeISA));
+  size_t pos = image_filename.rfind('/');
+  CHECK_NE(pos, std::string::npos) << image_filename;
+  std::string image_dir(image_filename, 0, pos);
+  int mkdir_result = mkdir(image_dir.c_str(), 0700);
+  CHECK_EQ(0, mkdir_result) << image_dir;
+  ScratchFile image_file(OS::CreateEmptyFile(image_filename.c_str()));
+
+  std::string oat_filename(image_filename, 0, image_filename.size() - 3);
+  oat_filename += "oat";
+  ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
+
   {
     {
       jobject class_loader = NULL;
@@ -68,28 +82,27 @@
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
                                                 &oat_writer,
-                                                tmp_elf.GetFile());
+                                                oat_file.GetFile());
       ASSERT_TRUE(success);
       timings.EndSplit();
     }
   }
-  // Workound bug that mcld::Linker::emit closes tmp_elf by reopening as tmp_oat.
-  UniquePtr<File> tmp_oat(OS::OpenFileReadWrite(tmp_elf.GetFilename().c_str()));
-  ASSERT_TRUE(tmp_oat.get() != NULL);
+  // Workound bug that mcld::Linker::emit closes oat_file by reopening as dup_oat.
+  std::unique_ptr<File> dup_oat(OS::OpenFileReadWrite(oat_file.GetFilename().c_str()));
+  ASSERT_TRUE(dup_oat.get() != NULL);
 
-  ScratchFile tmp_image(tmp, "art");
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
   {
     ImageWriter writer(*compiler_driver_.get());
-    bool success_image = writer.Write(tmp_image.GetFilename(), requested_image_base,
-                                      tmp_oat->GetPath(), tmp_oat->GetPath());
+    bool success_image = writer.Write(image_file.GetFilename(), requested_image_base,
+                                      dup_oat->GetPath(), dup_oat->GetPath());
     ASSERT_TRUE(success_image);
-    bool success_fixup = ElfFixup::Fixup(tmp_oat.get(), writer.GetOatDataBegin());
+    bool success_fixup = ElfFixup::Fixup(dup_oat.get(), writer.GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
   }
 
   {
-    UniquePtr<File> file(OS::OpenFileForReading(tmp_image.GetFilename().c_str()));
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
     ASSERT_TRUE(file.get() != NULL);
     ImageHeader image_header;
     file->ReadFully(&image_header, sizeof(image_header));
@@ -117,7 +130,7 @@
   java_lang_dex_file_ = NULL;
 
   std::string error_msg;
-  UniquePtr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(),
+  std::unique_ptr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(),
                                              GetLibCoreDexFileName().c_str(),
                                              &error_msg));
   ASSERT_TRUE(dex.get() != nullptr) << error_msg;
@@ -127,7 +140,7 @@
 
   Runtime::Options options;
   std::string image("-Ximage:");
-  image.append(tmp_image.GetFilename());
+  image.append(image_location.GetFilename());
   options.push_back(std::make_pair(image.c_str(), reinterpret_cast<void*>(NULL)));
 
   if (!Runtime::Create(options, false)) {
@@ -166,6 +179,11 @@
     }
     EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
   }
+
+  image_file.Unlink();
+  oat_file.Unlink();
+  int rmdir_result = rmdir(image_dir.c_str());
+  CHECK_EQ(0, rmdir_result);
 }
 
 TEST_F(ImageTest, ImageHeaderIsValid) {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index e6d983f..be53926 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -18,6 +18,7 @@
 
 #include <sys/stat.h>
 
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -51,8 +52,7 @@
 #include "object_utils.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref-inl.h"
-#include "UniquePtr.h"
+#include "handle_scope-inl.h"
 #include "utils.h"
 
 using ::art::mirror::ArtField;
@@ -77,7 +77,7 @@
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
-  UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
+  std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == NULL) {
     LOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
     return false;
@@ -141,7 +141,7 @@
   PatchOatCodeAndMethods();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
-  UniquePtr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
+  std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   if (image_file.get() == NULL) {
     LOG(ERROR) << "Failed to open image file " << image_filename;
@@ -251,7 +251,9 @@
 }
 
 bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) {
-  c->ComputeName();
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  mirror::Class::ComputeName(hs.NewHandle(c));
   return true;
 }
 
@@ -285,7 +287,7 @@
 }
 
 bool ImageWriter::IsImageClass(Class* klass) {
-  return compiler_driver_.IsImageClass(ClassHelper(klass).GetDescriptor());
+  return compiler_driver_.IsImageClass(klass->GetDescriptor().c_str());
 }
 
 struct NonImageClasses {
@@ -339,7 +341,7 @@
 bool ImageWriter::NonImageClassesVisitor(Class* klass, void* arg) {
   NonImageClasses* context = reinterpret_cast<NonImageClasses*>(arg);
   if (!context->image_writer->IsImageClass(klass)) {
-    context->non_image_classes->insert(ClassHelper(klass).GetDescriptor());
+    context->non_image_classes->insert(klass->GetDescriptor());
   }
   return true;
 }
@@ -359,7 +361,7 @@
     Class* klass = obj->AsClass();
     if (!image_writer->IsImageClass(klass)) {
       image_writer->DumpImageClasses();
-      CHECK(image_writer->IsImageClass(klass)) << ClassHelper(klass).GetDescriptor()
+      CHECK(image_writer->IsImageClass(klass)) << klass->GetDescriptor()
                                                << " " << PrettyDescriptor(klass);
     }
   }
@@ -382,16 +384,14 @@
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
-    Thread* self = Thread::Current();
-    SirtRef<Object> sirt_obj(self, obj);
-    mirror::String* interned = obj->AsString()->Intern();
-    if (sirt_obj.get() != interned) {
+    mirror::String* const interned = obj->AsString()->Intern();
+    if (obj != interned) {
       if (!IsImageOffsetAssigned(interned)) {
         // interned obj is after us, allocate its location early
         AssignImageOffset(interned);
       }
       // point those looking for this object to the interned version.
-      SetImageOffset(sirt_obj.get(), GetImageOffset(interned));
+      SetImageOffset(obj, GetImageOffset(interned));
       return;
     }
     // else (obj == interned), nothing to do but fall through to the normal case
@@ -404,20 +404,22 @@
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
-  SirtRef<Class> object_array_class(self, class_linker->FindSystemClass(self,
-                                                                        "[Ljava/lang/Object;"));
+  StackHandleScope<3> hs(self);
+  Handle<Class> object_array_class(hs.NewHandle(
+      class_linker->FindSystemClass(self, "[Ljava/lang/Object;")));
 
   // build an Object[] of all the DexCaches used in the source_space_
-  ObjectArray<Object>* dex_caches = ObjectArray<Object>::Alloc(self, object_array_class.get(),
-                                                               class_linker->GetDexCaches().size());
+  Handle<ObjectArray<Object>> dex_caches(
+      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(),
+                                              class_linker->GetDexCaches().size())));
   int i = 0;
   for (DexCache* dex_cache : class_linker->GetDexCaches()) {
     dex_caches->Set<false>(i++, dex_cache);
   }
 
   // build an Object[] of the roots needed to restore the runtime
-  SirtRef<ObjectArray<Object> > image_roots(
-      self, ObjectArray<Object>::Alloc(self, object_array_class.get(), ImageHeader::kImageRootsMax));
+  Handle<ObjectArray<Object>> image_roots(hs.NewHandle(
+      ObjectArray<Object>::Alloc(self, object_array_class.Get(), ImageHeader::kImageRootsMax)));
   image_roots->Set<false>(ImageHeader::kResolutionMethod, runtime->GetResolutionMethod());
   image_roots->Set<false>(ImageHeader::kImtConflictMethod, runtime->GetImtConflictMethod());
   image_roots->Set<false>(ImageHeader::kDefaultImt, runtime->GetDefaultImt());
@@ -427,27 +429,28 @@
                           runtime->GetCalleeSaveMethod(Runtime::kRefsOnly));
   image_roots->Set<false>(ImageHeader::kRefsAndArgsSaveMethod,
                           runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
-  image_roots->Set<false>(ImageHeader::kDexCaches, dex_caches);
+  image_roots->Set<false>(ImageHeader::kDexCaches, dex_caches.Get());
   image_roots->Set<false>(ImageHeader::kClassRoots, class_linker->GetClassRoots());
   for (int i = 0; i < ImageHeader::kImageRootsMax; i++) {
     CHECK(image_roots->Get(i) != NULL);
   }
-  return image_roots.get();
+  return image_roots.Get();
 }
 
 // Walk instance fields of the given Class. Separate function to allow recursion on the super
 // class.
 void ImageWriter::WalkInstanceFields(mirror::Object* obj, mirror::Class* klass) {
   // Visit fields of parent classes first.
-  SirtRef<mirror::Class> sirt_class(Thread::Current(), klass);
-  mirror::Class* super = sirt_class->GetSuperClass();
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::Class> h_class(hs.NewHandle(klass));
+  mirror::Class* super = h_class->GetSuperClass();
   if (super != nullptr) {
     WalkInstanceFields(obj, super);
   }
   //
-  size_t num_reference_fields = sirt_class->NumReferenceInstanceFields();
+  size_t num_reference_fields = h_class->NumReferenceInstanceFields();
   for (size_t i = 0; i < num_reference_fields; ++i) {
-    mirror::ArtField* field = sirt_class->GetInstanceField(i);
+    mirror::ArtField* field = h_class->GetInstanceField(i);
     MemberOffset field_offset = field->GetOffset();
     mirror::Object* value = obj->GetFieldObject<mirror::Object>(field_offset);
     if (value != nullptr) {
@@ -460,28 +463,28 @@
 void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
   if (!IsImageOffsetAssigned(obj)) {
     // Walk instance fields of all objects
-    Thread* self = Thread::Current();
-    SirtRef<mirror::Object> sirt_obj(self, obj);
-    SirtRef<mirror::Class> klass(self, obj->GetClass());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::Object> h_obj(hs.NewHandle(obj));
+    Handle<mirror::Class> klass(hs.NewHandle(obj->GetClass()));
     // visit the object itself.
-    CalculateObjectOffsets(sirt_obj.get());
-    WalkInstanceFields(sirt_obj.get(), klass.get());
+    CalculateObjectOffsets(h_obj.Get());
+    WalkInstanceFields(h_obj.Get(), klass.Get());
     // Walk static fields of a Class.
-    if (sirt_obj->IsClass()) {
+    if (h_obj->IsClass()) {
       size_t num_static_fields = klass->NumReferenceStaticFields();
       for (size_t i = 0; i < num_static_fields; ++i) {
         mirror::ArtField* field = klass->GetStaticField(i);
         MemberOffset field_offset = field->GetOffset();
-        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object>(field_offset);
+        mirror::Object* value = h_obj->GetFieldObject<mirror::Object>(field_offset);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
         }
       }
-    } else if (sirt_obj->IsObjectArray()) {
+    } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
-      int32_t length = sirt_obj->AsObjectArray<mirror::Object>()->GetLength();
+      int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
       for (int32_t i = 0; i < length; i++) {
-        mirror::ObjectArray<mirror::Object>* obj_array = sirt_obj->AsObjectArray<mirror::Object>();
+        mirror::ObjectArray<mirror::Object>* obj_array = h_obj->AsObjectArray<mirror::Object>();
         mirror::Object* value = obj_array->Get(i);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
@@ -500,7 +503,8 @@
 void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset) {
   CHECK_NE(0U, oat_loaded_size);
   Thread* self = Thread::Current();
-  SirtRef<ObjectArray<Object> > image_roots(self, CreateImageRoots());
+  StackHandleScope<1> hs(self);
+  Handle<ObjectArray<Object>> image_roots(hs.NewHandle(CreateImageRoots()));
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   DCHECK_EQ(0U, image_end_);
@@ -533,7 +537,7 @@
                            static_cast<uint32_t>(image_end_),
                            RoundUp(image_end_, kPageSize),
                            RoundUp(bitmap_bytes, kPageSize),
-                           PointerToLowMemUInt32(GetImageAddress(image_roots.get())),
+                           PointerToLowMemUInt32(GetImageAddress(image_roots.Get())),
                            oat_file_->GetOatHeader().GetChecksum(),
                            PointerToLowMemUInt32(oat_file_begin),
                            PointerToLowMemUInt32(oat_data_begin_),
@@ -587,7 +591,7 @@
 
   void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    Object* ref = obj->GetFieldObject<Object, kVerifyNone, false>(offset);
+    Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
     // image.
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
@@ -691,14 +695,14 @@
 static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(*patch->GetTargetDexFile()));
-  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(class_linker->FindDexCache(*patch->GetTargetDexFile())));
   ArtMethod* method = class_linker->ResolveMethod(*patch->GetTargetDexFile(),
                                                   patch->GetTargetMethodIdx(),
                                                   dex_cache,
-                                                  class_loader,
-                                                  NULL,
+                                                  NullHandle<mirror::ClassLoader>(),
+                                                  NullHandle<mirror::ArtMethod>(),
                                                   patch->GetTargetInvokeType());
   CHECK(method != NULL)
     << patch->GetTargetDexFile()->GetLocation() << " " << patch->GetTargetMethodIdx();
@@ -714,13 +718,10 @@
 static Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(patch->GetDexFile()));
-  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
-  Class* klass = class_linker->ResolveType(patch->GetDexFile(),
-                                           patch->GetTargetTypeIdx(),
-                                           dex_cache,
-                                           class_loader);
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
+  Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
+                                           dex_cache, NullHandle<mirror::ClassLoader>());
   CHECK(klass != NULL)
     << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
   CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
@@ -754,7 +755,8 @@
       uintptr_t value = quick_code - patch_location + patch->RelativeOffset();
       SetPatchLocation(patch, value);
     } else {
-      if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) {
+      if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge()) ||
+          quick_code == reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
         if (target->IsNative()) {
           // generic JNI, not interpreter bridge from GetQuickOatCodeFor().
           code_offset = quick_generic_jni_trampoline_offset_;
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 7e22a96..aff155a 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include <cstddef>
+#include <memory>
 #include <set>
 #include <string>
 
@@ -30,7 +31,6 @@
 #include "os.h"
 #include "safe_map.h"
 #include "gc/space/space.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -161,7 +161,7 @@
   OatFile* oat_file_;
 
   // Memory mapped for generating the image.
-  UniquePtr<MemMap> image_;
+  std::unique_ptr<MemMap> image_;
 
   // Offset to the free space in image_.
   size_t image_end_;
@@ -170,13 +170,13 @@
   byte* image_begin_;
 
   // Saved hashes (objects are inside of the image so that they don't move).
-  std::vector<std::pair<mirror::Object*, uint32_t> > saved_hashes_;
+  std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
 
   // Beginning target oat address for the pointers from the output image to its oat file.
   const byte* oat_data_begin_;
 
   // Image bitmap which lets us know where the objects inside of the image reside.
-  UniquePtr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
+  std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
   uint32_t interpreter_to_interpreter_bridge_offset_;
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 6b5e55e..9927fe1 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <memory>
+
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
@@ -31,7 +33,6 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
-#include "UniquePtr.h"
 
 extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint count) {
   return count + 1;
@@ -48,7 +49,9 @@
   void CompileForTest(jobject class_loader, bool direct,
                       const char* method_name, const char* method_sig) {
     ScopedObjectAccess soa(Thread::Current());
-    SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(class_loader));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
     // Compile the native method before starting the runtime
     mirror::Class* c = class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader);
     mirror::ArtMethod* method;
@@ -153,8 +156,9 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader_)));
   ASSERT_TRUE(
       Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
@@ -169,8 +173,9 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader_)));
   ASSERT_TRUE(
       Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
diff --git a/compiler/jni/portable/jni_compiler.cc b/compiler/jni/portable/jni_compiler.cc
index 0c14346..d2f54f8 100644
--- a/compiler/jni/portable/jni_compiler.cc
+++ b/compiler/jni/portable/jni_compiler.cc
@@ -98,7 +98,7 @@
   arg_begin = arg_iter;
 
   // Count the number of Object* arguments
-  uint32_t sirt_size = 1;
+  uint32_t handle_scope_size = 1;
   // "this" object pointer for non-static
   // "class" object pointer for static
   for (unsigned i = 0; arg_iter != arg_end; ++i, ++arg_iter) {
@@ -106,12 +106,12 @@
     arg_iter->setName(StringPrintf("a%u", i));
 #endif
     if (arg_iter->getType() == irb_.getJObjectTy()) {
-      ++sirt_size;
+      ++handle_scope_size;
     }
   }
 
   // Shadow stack
-  ::llvm::StructType* shadow_frame_type = irb_.getShadowFrameTy(sirt_size);
+  ::llvm::StructType* shadow_frame_type = irb_.getShadowFrameTy(handle_scope_size);
   ::llvm::AllocaInst* shadow_frame_ = irb_.CreateAlloca(shadow_frame_type);
 
   // Store the dex pc
@@ -123,7 +123,7 @@
   // Push the shadow frame
   ::llvm::Value* shadow_frame_upcast = irb_.CreateConstGEP2_32(shadow_frame_, 0, 0);
   ::llvm::Value* old_shadow_frame =
-      irb_.Runtime().EmitPushShadowFrame(shadow_frame_upcast, method_object_addr, sirt_size);
+      irb_.Runtime().EmitPushShadowFrame(shadow_frame_upcast, method_object_addr, handle_scope_size);
 
   // Get JNIEnv
   ::llvm::Value* jni_env_object_addr =
@@ -148,35 +148,35 @@
   // Variables for GetElementPtr
   ::llvm::Value* gep_index[] = {
     irb_.getInt32(0),  // No displacement for shadow frame pointer
-    irb_.getInt32(1),  // SIRT
+    irb_.getInt32(1),  // handle scope
     NULL,
   };
 
-  size_t sirt_member_index = 0;
+  size_t handle_scope_member_index = 0;
 
-  // Store the "this object or class object" to SIRT
-  gep_index[2] = irb_.getInt32(sirt_member_index++);
-  ::llvm::Value* sirt_field_addr = irb_.CreateBitCast(irb_.CreateGEP(shadow_frame_, gep_index),
+  // Store the "this object or class object" to handle scope
+  gep_index[2] = irb_.getInt32(handle_scope_member_index++);
+  ::llvm::Value* handle_scope_field_addr = irb_.CreateBitCast(irb_.CreateGEP(shadow_frame_, gep_index),
                                                     irb_.getJObjectTy()->getPointerTo());
-  irb_.CreateStore(this_object_or_class_object, sirt_field_addr, kTBAAShadowFrame);
+  irb_.CreateStore(this_object_or_class_object, handle_scope_field_addr, kTBAAShadowFrame);
   // Push the "this object or class object" to out args
-  this_object_or_class_object = irb_.CreateBitCast(sirt_field_addr, irb_.getJObjectTy());
+  this_object_or_class_object = irb_.CreateBitCast(handle_scope_field_addr, irb_.getJObjectTy());
   args.push_back(this_object_or_class_object);
-  // Store arguments to SIRT, and push back to args
+  // Store arguments to handle scope, and push back to args
   for (arg_iter = arg_begin; arg_iter != arg_end; ++arg_iter) {
     if (arg_iter->getType() == irb_.getJObjectTy()) {
-      // Store the reference type arguments to SIRT
-      gep_index[2] = irb_.getInt32(sirt_member_index++);
-      ::llvm::Value* sirt_field_addr = irb_.CreateBitCast(irb_.CreateGEP(shadow_frame_, gep_index),
+      // Store the reference type arguments to handle scope
+      gep_index[2] = irb_.getInt32(handle_scope_member_index++);
+      ::llvm::Value* handle_scope_field_addr = irb_.CreateBitCast(irb_.CreateGEP(shadow_frame_, gep_index),
                                                         irb_.getJObjectTy()->getPointerTo());
-      irb_.CreateStore(arg_iter, sirt_field_addr, kTBAAShadowFrame);
-      // Note null is placed in the SIRT but the jobject passed to the native code must be null
-      // (not a pointer into the SIRT as with regular references).
+      irb_.CreateStore(arg_iter, handle_scope_field_addr, kTBAAShadowFrame);
+      // Note null is placed in the handle scope but the jobject passed to the native code must be null
+      // (not a pointer into the handle scope as with regular references).
       ::llvm::Value* equal_null = irb_.CreateICmpEQ(arg_iter, irb_.getJNull());
       ::llvm::Value* arg =
           irb_.CreateSelect(equal_null,
                             irb_.getJNull(),
-                            irb_.CreateBitCast(sirt_field_addr, irb_.getJObjectTy()));
+                            irb_.CreateBitCast(handle_scope_field_addr, irb_.getJObjectTy()));
       args.push_back(arg);
     } else {
       args.push_back(arg_iter);
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index ae18d2e..649a80f 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -144,10 +144,10 @@
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for SIRT header
-  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
+  // References plus 2 words for HandleScope header
+  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t ArmJniCallingConvention::OutArgSize() {
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 6212a23..ffd27ee 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -197,10 +197,10 @@
 size_t Arm64JniCallingConvention::FrameSize() {
   // Method*, callee save area size, local reference segment state
   size_t frame_data_size = ((1 + CalleeSaveRegisters().size()) * kFramePointerSize) + sizeof(uint32_t);
-  // References plus 2 words for SIRT header
-  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
+  // References plus 2 words for HandleScope header
+  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t Arm64JniCallingConvention::OutArgSize() {
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index a99a4c2..95c2d40 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -126,8 +126,8 @@
 }
 
 FrameOffset JniCallingConvention::SavedLocalReferenceCookieOffset() const {
-  size_t references_size = sirt_pointer_size_ * ReferenceCount();  // size excluding header
-  return FrameOffset(SirtReferencesOffset().Int32Value() + references_size);
+  size_t references_size = handle_scope_pointer_size_ * ReferenceCount();  // size excluding header
+  return FrameOffset(HandleerencesOffset().Int32Value() + references_size);
 }
 
 FrameOffset JniCallingConvention::ReturnValueSaveLocation() const {
@@ -219,13 +219,13 @@
   }
 }
 
-// Return position of SIRT entry holding reference at the current iterator
+// Return position of handle scope entry holding reference at the current iterator
 // position
-FrameOffset JniCallingConvention::CurrentParamSirtEntryOffset() {
+FrameOffset JniCallingConvention::CurrentParamHandleScopeEntryOffset() {
   CHECK(IsCurrentParamAReference());
-  CHECK_LT(SirtLinkOffset(), SirtNumRefsOffset());
-  int result = SirtReferencesOffset().Int32Value() + itr_refs_ * sirt_pointer_size_;
-  CHECK_GT(result, SirtNumRefsOffset().Int32Value());
+  CHECK_LT(HandleScopeLinkOffset(), HandleScopeNumRefsOffset());
+  int result = HandleerencesOffset().Int32Value() + itr_refs_ * handle_scope_pointer_size_;
+  CHECK_GT(result, HandleScopeNumRefsOffset().Int32Value());
   return FrameOffset(result);
 }
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 18afd58..2a6e7d9 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
 #include <vector>
-#include "stack_indirect_reference_table.h"
+#include "handle_scope.h"
 #include "thread.h"
 #include "utils/managed_register.h"
 
@@ -73,7 +73,7 @@
       : itr_slots_(0), itr_refs_(0), itr_args_(0), itr_longs_and_doubles_(0),
         itr_float_and_doubles_(0), displacement_(0),
         frame_pointer_size_(frame_pointer_size),
-        sirt_pointer_size_(sizeof(StackReference<mirror::Object>)),
+        handle_scope_pointer_size_(sizeof(StackReference<mirror::Object>)),
         is_static_(is_static), is_synchronized_(is_synchronized),
         shorty_(shorty) {
     num_args_ = (is_static ? 0 : 1) + strlen(shorty) - 1;
@@ -197,8 +197,8 @@
   FrameOffset displacement_;
   // The size of a reference.
   const size_t frame_pointer_size_;
-  // The size of a reference entry within the SIRT.
-  const size_t sirt_pointer_size_;
+  // The size of a reference entry within the handle scope.
+  const size_t handle_scope_pointer_size_;
 
  private:
   const bool is_static_;
@@ -315,26 +315,25 @@
   virtual FrameOffset CurrentParamStackOffset() = 0;
 
   // Iterator interface extension for JNI
-  FrameOffset CurrentParamSirtEntryOffset();
+  FrameOffset CurrentParamHandleScopeEntryOffset();
 
-  // Position of SIRT and interior fields
-  FrameOffset SirtOffset() const {
+  // Position of handle scope and interior fields
+  FrameOffset HandleScopeOffset() const {
     return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_);  // above Method*
   }
 
-  FrameOffset SirtLinkOffset() const {
-    return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::LinkOffset(frame_pointer_size_));
+  FrameOffset HandleScopeLinkOffset() const {
+    return FrameOffset(HandleScopeOffset().Int32Value() + HandleScope::LinkOffset(frame_pointer_size_));
   }
 
-  FrameOffset SirtNumRefsOffset() const {
-    return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::NumberOfReferencesOffset(frame_pointer_size_));
+  FrameOffset HandleScopeNumRefsOffset() const {
+    return FrameOffset(HandleScopeOffset().Int32Value() +
+                       HandleScope::NumberOfReferencesOffset(frame_pointer_size_));
   }
 
-  FrameOffset SirtReferencesOffset() const {
-    return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::ReferencesOffset(frame_pointer_size_));
+  FrameOffset HandleerencesOffset() const {
+    return FrameOffset(HandleScopeOffset().Int32Value() +
+                       HandleScope::ReferencesOffset(frame_pointer_size_));
   }
 
   virtual ~JniCallingConvention() {}
@@ -350,7 +349,7 @@
                                 size_t frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 
-  // Number of stack slots for outgoing arguments, above which the SIRT is
+  // Number of stack slots for outgoing arguments, above which the handle scope is
   // located
   virtual size_t NumberOfOutgoingStackArgs() = 0;
 
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 9f439eb..7664a7f 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -15,6 +15,7 @@
  */
 
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "base/logging.h"
@@ -33,7 +34,6 @@
 #include "utils/mips/managed_register_mips.h"
 #include "utils/x86/managed_register_x86.h"
 #include "thread.h"
-#include "UniquePtr.h"
 
 #define __ jni_asm->
 
@@ -52,7 +52,7 @@
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler,
+CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
                                             uint32_t access_flags, uint32_t method_idx,
                                             const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
@@ -60,17 +60,17 @@
   const bool is_static = (access_flags & kAccStatic) != 0;
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-  InstructionSet instruction_set = compiler.GetInstructionSet();
+  InstructionSet instruction_set = driver->GetInstructionSet();
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
   }
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
-  UniquePtr<JniCallingConvention> main_jni_conv(
+  std::unique_ptr<JniCallingConvention> main_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
   bool reference_return = main_jni_conv->IsReturnAReference();
 
-  UniquePtr<ManagedRuntimeCallingConvention> mr_conv(
+  std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
       ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
@@ -86,11 +86,11 @@
     jni_end_shorty = "V";
   }
 
-  UniquePtr<JniCallingConvention> end_jni_conv(
+  std::unique_ptr<JniCallingConvention> end_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -103,54 +103,54 @@
   const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
 
-  // 2. Set up the StackIndirectReferenceTable
+  // 2. Set up the HandleScope
   mr_conv->ResetIterator(FrameOffset(frame_size));
   main_jni_conv->ResetIterator(FrameOffset(0));
-  __ StoreImmediateToFrame(main_jni_conv->SirtNumRefsOffset(),
+  __ StoreImmediateToFrame(main_jni_conv->HandleScopeNumRefsOffset(),
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
   if (is_64_bit_target) {
-    __ CopyRawPtrFromThread64(main_jni_conv->SirtLinkOffset(),
-                            Thread::TopSirtOffset<8>(),
+    __ CopyRawPtrFromThread64(main_jni_conv->HandleScopeLinkOffset(),
+                            Thread::TopHandleScopeOffset<8>(),
                             mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread64(Thread::TopSirtOffset<8>(),
-                                main_jni_conv->SirtOffset(),
+    __ StoreStackOffsetToThread64(Thread::TopHandleScopeOffset<8>(),
+                                main_jni_conv->HandleScopeOffset(),
                                 mr_conv->InterproceduralScratchRegister());
   } else {
-    __ CopyRawPtrFromThread32(main_jni_conv->SirtLinkOffset(),
-                            Thread::TopSirtOffset<4>(),
+    __ CopyRawPtrFromThread32(main_jni_conv->HandleScopeLinkOffset(),
+                            Thread::TopHandleScopeOffset<4>(),
                             mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread32(Thread::TopSirtOffset<4>(),
-                                main_jni_conv->SirtOffset(),
+    __ StoreStackOffsetToThread32(Thread::TopHandleScopeOffset<4>(),
+                                main_jni_conv->HandleScopeOffset(),
                                 mr_conv->InterproceduralScratchRegister());
   }
 
-  // 3. Place incoming reference arguments into SIRT
+  // 3. Place incoming reference arguments into handle scope
   main_jni_conv->Next();  // Skip JNIEnv*
   // 3.5. Create Class argument for static methods out of passed method
   if (is_static) {
-    FrameOffset sirt_offset = main_jni_conv->CurrentParamSirtEntryOffset();
-    // Check sirt offset is within frame
-    CHECK_LT(sirt_offset.Uint32Value(), frame_size);
+    FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+    // Check handle scope offset is within frame
+    CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
     __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
                mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset());
     __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
-    __ StoreRef(sirt_offset, main_jni_conv->InterproceduralScratchRegister());
-    main_jni_conv->Next();  // in SIRT so move to next argument
+    __ StoreRef(handle_scope_offset, main_jni_conv->InterproceduralScratchRegister());
+    main_jni_conv->Next();  // in handle scope so move to next argument
   }
   while (mr_conv->HasNext()) {
     CHECK(main_jni_conv->HasNext());
     bool ref_param = main_jni_conv->IsCurrentParamAReference();
     CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
-    // References need placing in SIRT and the entry value passing
+    // References need placing in handle scope and the entry value passing
     if (ref_param) {
-      // Compute SIRT entry, note null is placed in the SIRT but its boxed value
+      // Compute handle scope entry, note null is placed in the handle scope but its boxed value
       // must be NULL
-      FrameOffset sirt_offset = main_jni_conv->CurrentParamSirtEntryOffset();
-      // Check SIRT offset is within frame and doesn't run into the saved segment state
-      CHECK_LT(sirt_offset.Uint32Value(), frame_size);
-      CHECK_NE(sirt_offset.Uint32Value(),
+      FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+      // Check handle scope offset is within frame and doesn't run into the saved segment state
+      CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
+      CHECK_NE(handle_scope_offset.Uint32Value(),
                main_jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
       bool input_in_reg = mr_conv->IsCurrentParamInRegister();
       bool input_on_stack = mr_conv->IsCurrentParamOnStack();
@@ -159,11 +159,11 @@
       if (input_in_reg) {
         ManagedRegister in_reg  =  mr_conv->CurrentParamRegister();
         __ VerifyObject(in_reg, mr_conv->IsCurrentArgPossiblyNull());
-        __ StoreRef(sirt_offset, in_reg);
+        __ StoreRef(handle_scope_offset, in_reg);
       } else if (input_on_stack) {
         FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
         __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
-        __ CopyRef(sirt_offset, in_off,
+        __ CopyRef(handle_scope_offset, in_off,
                    mr_conv->InterproceduralScratchRegister());
       }
     }
@@ -197,20 +197,20 @@
   ThreadOffset<8> jni_start64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStartSynchronized)
                                                 : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStart);
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-  FrameOffset locked_object_sirt_offset(0);
+  FrameOffset locked_object_handle_scope_offset(0);
   if (is_synchronized) {
     // Pass object for locking.
     main_jni_conv->Next();  // Skip JNIEnv.
-    locked_object_sirt_offset = main_jni_conv->CurrentParamSirtEntryOffset();
+    locked_object_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     if (main_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-      __ CreateSirtEntry(out_off, locked_object_sirt_offset,
+      __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
                          mr_conv->InterproceduralScratchRegister(),
                          false);
     } else {
       ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-      __ CreateSirtEntry(out_reg, locked_object_sirt_offset,
+      __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
                          ManagedRegister::NoRegister(), false);
     }
     main_jni_conv->Next();
@@ -274,15 +274,15 @@
     mr_conv->ResetIterator(FrameOffset(frame_size+main_out_arg_size));
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     main_jni_conv->Next();  // Skip JNIEnv*
-    FrameOffset sirt_offset = main_jni_conv->CurrentParamSirtEntryOffset();
+    FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     if (main_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-      __ CreateSirtEntry(out_off, sirt_offset,
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset,
                          mr_conv->InterproceduralScratchRegister(),
                          false);
     } else {
       ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-      __ CreateSirtEntry(out_reg, sirt_offset,
+      __ CreateHandleScopeEntry(out_reg, handle_scope_offset,
                          ManagedRegister::NoRegister(), false);
     }
   }
@@ -369,12 +369,12 @@
     // Pass object for unlocking.
     if (end_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-      __ CreateSirtEntry(out_off, locked_object_sirt_offset,
+      __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
                          end_jni_conv->InterproceduralScratchRegister(),
                          false);
     } else {
       ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-      __ CreateSirtEntry(out_reg, locked_object_sirt_offset,
+      __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
                          ManagedRegister::NoRegister(), false);
     }
     end_jni_conv->Next();
@@ -423,7 +423,7 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  return new CompiledMethod(compiler,
+  return new CompiledMethod(driver,
                             instruction_set,
                             managed_code,
                             frame_size,
@@ -438,7 +438,7 @@
                           size_t frame_size, size_t out_arg_size) {
   bool input_in_reg = mr_conv->IsCurrentParamInRegister();
   bool output_in_reg = jni_conv->IsCurrentParamInRegister();
-  FrameOffset sirt_offset(0);
+  FrameOffset handle_scope_offset(0);
   bool null_allowed = false;
   bool ref_param = jni_conv->IsCurrentParamAReference();
   CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
@@ -449,21 +449,21 @@
   } else {
     CHECK(jni_conv->IsCurrentParamOnStack());
   }
-  // References need placing in SIRT and the entry address passing
+  // References need placing in handle scope and the entry address passing
   if (ref_param) {
     null_allowed = mr_conv->IsCurrentArgPossiblyNull();
-    // Compute SIRT offset. Note null is placed in the SIRT but the jobject
-    // passed to the native code must be null (not a pointer into the SIRT
+    // Compute handle scope offset. Note null is placed in the handle scope but the jobject
+    // passed to the native code must be null (not a pointer into the handle scope
     // as with regular references).
-    sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-    // Check SIRT offset is within frame.
-    CHECK_LT(sirt_offset.Uint32Value(), (frame_size + out_arg_size));
+    handle_scope_offset = jni_conv->CurrentParamHandleScopeEntryOffset();
+    // Check handle scope offset is within frame.
+    CHECK_LT(handle_scope_offset.Uint32Value(), (frame_size + out_arg_size));
   }
   if (input_in_reg && output_in_reg) {
     ManagedRegister in_reg = mr_conv->CurrentParamRegister();
     ManagedRegister out_reg = jni_conv->CurrentParamRegister();
     if (ref_param) {
-      __ CreateSirtEntry(out_reg, sirt_offset, in_reg, null_allowed);
+      __ CreateHandleScopeEntry(out_reg, handle_scope_offset, in_reg, null_allowed);
     } else {
       if (!mr_conv->IsCurrentParamOnStack()) {
         // regular non-straddling move
@@ -475,7 +475,7 @@
   } else if (!input_in_reg && !output_in_reg) {
     FrameOffset out_off = jni_conv->CurrentParamStackOffset();
     if (ref_param) {
-      __ CreateSirtEntry(out_off, sirt_offset, mr_conv->InterproceduralScratchRegister(),
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset, mr_conv->InterproceduralScratchRegister(),
                          null_allowed);
     } else {
       FrameOffset in_off = mr_conv->CurrentParamStackOffset();
@@ -489,7 +489,7 @@
     // Check that incoming stack arguments are above the current stack frame.
     CHECK_GT(in_off.Uint32Value(), frame_size);
     if (ref_param) {
-      __ CreateSirtEntry(out_reg, sirt_offset, ManagedRegister::NoRegister(), null_allowed);
+      __ CreateHandleScopeEntry(out_reg, handle_scope_offset, ManagedRegister::NoRegister(), null_allowed);
     } else {
       size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
@@ -502,8 +502,8 @@
     // Check outgoing argument is within frame
     CHECK_LT(out_off.Uint32Value(), frame_size);
     if (ref_param) {
-      // TODO: recycle value in in_reg rather than reload from SIRT
-      __ CreateSirtEntry(out_off, sirt_offset, mr_conv->InterproceduralScratchRegister(),
+      // TODO: recycle value in in_reg rather than reload from handle scope
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset, mr_conv->InterproceduralScratchRegister(),
                          null_allowed);
     } else {
       size_t param_size = mr_conv->CurrentParamSize();
@@ -536,7 +536,7 @@
 
 }  // namespace art
 
-extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver& compiler,
+extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* compiler,
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file) {
   return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 8e1c0c7..0402fe6 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -148,10 +148,10 @@
 size_t MipsJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for SIRT header
-  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
+  // References plus 2 words for HandleScope header
+  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t MipsJniCallingConvention::OutArgSize() {
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 153f953..97b4cdf 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -125,10 +125,10 @@
 size_t X86JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for SIRT header
-  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
+  // References plus 2 words for HandleScope header
+  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t X86JniCallingConvention::OutArgSize() {
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 4dfa29a..4871c87 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -133,17 +133,17 @@
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
-  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 | 1 << R13 |
+  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 |
       1 << kNumberOfCpuRegisters;
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus link_ (pointer) and number_of_references_ (uint32_t) for SIRT header
-  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
+  // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
+  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t X86_64JniCallingConvention::OutArgSize() {
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 2812700..5990e8c 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -136,7 +136,7 @@
 
 CompiledMethod* CompilerLLVM::
 CompileDexMethod(DexCompilationUnit* dex_compilation_unit, InvokeType invoke_type) {
-  UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
+  std::unique_ptr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
   cunit->SetDexCompilationUnit(dex_compilation_unit);
   cunit->SetCompilerDriver(compiler_driver_);
@@ -163,9 +163,9 @@
 
 CompiledMethod* CompilerLLVM::
 CompileNativeMethod(DexCompilationUnit* dex_compilation_unit) {
-  UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
+  std::unique_ptr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
-  UniquePtr<JniCompiler> jni_compiler(
+  std::unique_ptr<JniCompiler> jni_compiler(
       new JniCompiler(cunit.get(), compiler_driver_, dex_compilation_unit));
 
   return jni_compiler->Compile();
@@ -175,8 +175,8 @@
 }  // namespace llvm
 }  // namespace art
 
-static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver& driver) {
-  void *compiler_context = driver.GetCompilerContext();
+static art::llvm::CompilerLLVM* ContextOf(art::CompilerDriver* driver) {
+  void *compiler_context = driver->GetCompilerContext();
   CHECK(compiler_context != NULL);
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
@@ -187,20 +187,20 @@
   return reinterpret_cast<art::llvm::CompilerLLVM*>(compiler_context);
 }
 
-extern "C" void ArtInitCompilerContext(art::CompilerDriver& driver) {
-  CHECK(driver.GetCompilerContext() == NULL);
+extern "C" void ArtInitCompilerContext(art::CompilerDriver* driver) {
+  CHECK(driver->GetCompilerContext() == nullptr);
 
-  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(&driver,
-                                                                       driver.GetInstructionSet());
+  art::llvm::CompilerLLVM* compiler_llvm = new art::llvm::CompilerLLVM(driver,
+                                                                       driver->GetInstructionSet());
 
-  driver.SetCompilerContext(compiler_llvm);
+  driver->SetCompilerContext(compiler_llvm);
 }
 
-extern "C" void ArtUnInitCompilerContext(art::CompilerDriver& driver) {
+extern "C" void ArtUnInitCompilerContext(art::CompilerDriver* driver) {
   delete ContextOf(driver);
-  driver.SetCompilerContext(NULL);
+  driver->SetCompilerContext(nullptr);
 }
-extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtCompileMethod(art::CompilerDriver* driver,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags,
                                                  art::InvokeType invoke_type,
@@ -213,13 +213,13 @@
 
   art::DexCompilationUnit dex_compilation_unit(
     NULL, class_loader, class_linker, dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags, driver->GetVerifiedMethod(&dex_file, method_idx));
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileDexMethod(&dex_compilation_unit, invoke_type);
   return result;
 }
 
-extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver& driver,
+extern "C" art::CompiledMethod* ArtLLVMJniCompileMethod(art::CompilerDriver* driver,
                                                         uint32_t access_flags, uint32_t method_idx,
                                                         const art::DexFile& dex_file) {
   art::ClassLinker *class_linker = art::Runtime::Current()->GetClassLinker();
diff --git a/compiler/llvm/compiler_llvm.h b/compiler/llvm/compiler_llvm.h
index c2211fb..cc74deb 100644
--- a/compiler/llvm/compiler_llvm.h
+++ b/compiler/llvm/compiler_llvm.h
@@ -17,18 +17,17 @@
 #ifndef ART_COMPILER_LLVM_COMPILER_LLVM_H_
 #define ART_COMPILER_LLVM_COMPILER_LLVM_H_
 
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
 #include "base/macros.h"
 #include "dex_file.h"
 #include "driver/compiler_driver.h"
 #include "instruction_set.h"
 #include "mirror/object.h"
 
-#include <UniquePtr.h>
-
-#include <string>
-#include <utility>
-#include <vector>
-
 namespace art {
   class CompiledMethod;
   class CompilerDriver;
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index cf28db3..25c9b20 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -141,7 +141,7 @@
 
   std::vector<llvm::BasicBlock*> basic_block_landing_pads_;
   llvm::BasicBlock* current_bb_;
-  std::map<llvm::BasicBlock*, std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*> > >
+  std::map<llvm::BasicBlock*, std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*>>>
       landing_pad_phi_mapping_;
   llvm::BasicBlock* basic_block_unwind_;
 
@@ -545,7 +545,7 @@
     }
 
     llvm::TerminatorInst* term_inst = lbb->getTerminator();
-    std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*> >& rewrite_pair
+    std::vector<std::pair<llvm::BasicBlock*, llvm::BasicBlock*>>& rewrite_pair
         = landing_pad_phi_mapping_[lbb];
     irb_.SetInsertPoint(lbb->begin());
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 78bdb4d..741c2d7 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -152,7 +152,7 @@
   std::string bitcode;
   DumpBitcodeToString(bitcode);
   std::string filename(StringPrintf("%s/Art%zu.bc", DumpDirectory().c_str(), cunit_id_));
-  UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
+  std::unique_ptr<File> output(OS::CreateEmptyFile(filename.c_str()));
   output->WriteFully(bitcode.data(), bitcode.size());
   LOG(INFO) << ".bc file written successfully: " << filename;
 }
@@ -179,7 +179,7 @@
   if (kDumpELF) {
     // Dump the ELF image for debugging
     std::string filename(StringPrintf("%s/Art%zu.o", DumpDirectory().c_str(), cunit_id_));
-    UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
+    std::unique_ptr<File> output(OS::CreateEmptyFile(filename.c_str()));
     output->WriteFully(elf_object_.data(), elf_object_.size());
     LOG(INFO) << ".o file written successfully: " << filename;
   }
diff --git a/compiler/llvm/llvm_compilation_unit.h b/compiler/llvm/llvm_compilation_unit.h
index 58aa6fd..f11fb6e 100644
--- a/compiler/llvm/llvm_compilation_unit.h
+++ b/compiler/llvm/llvm_compilation_unit.h
@@ -17,6 +17,10 @@
 #ifndef ART_COMPILER_LLVM_LLVM_COMPILATION_UNIT_H_
 #define ART_COMPILER_LLVM_LLVM_COMPILATION_UNIT_H_
 
+#include <memory>
+#include <string>
+#include <vector>
+
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "dex/compiler_internals.h"
@@ -28,10 +32,6 @@
 #include "runtime_support_llvm_func.h"
 #include "safe_map.h"
 
-#include <UniquePtr.h>
-#include <string>
-#include <vector>
-
 namespace art {
   class CompiledMethod;
 }
@@ -106,12 +106,12 @@
   const CompilerLLVM* compiler_llvm_;
   const size_t cunit_id_;
 
-  UniquePtr< ::llvm::LLVMContext> context_;
-  UniquePtr<IRBuilder> irb_;
-  UniquePtr<RuntimeSupportBuilder> runtime_support_;
+  std::unique_ptr< ::llvm::LLVMContext> context_;
+  std::unique_ptr<IRBuilder> irb_;
+  std::unique_ptr<RuntimeSupportBuilder> runtime_support_;
   ::llvm::Module* module_;  // Managed by context_
-  UniquePtr<IntrinsicHelper> intrinsic_helper_;
-  UniquePtr<LLVMInfo> llvm_info_;
+  std::unique_ptr<IntrinsicHelper> intrinsic_helper_;
+  std::unique_ptr<LLVMInfo> llvm_info_;
   CompilerDriver* driver_;
   DexCompilationUnit* dex_compilation_unit_;
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b5d3923..6812f3c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -17,11 +17,12 @@
 #include "common_compiler_test.h"
 #include "compiler/compiler.h"
 #include "compiler/oat_writer.h"
+#include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "oat_file.h"
+#include "oat_file-inl.h"
 #include "vector_output_stream.h"
 
 namespace art {
@@ -127,7 +128,7 @@
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
   std::string error_msg;
-  UniquePtr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false,
                                             &error_msg));
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
@@ -152,8 +153,9 @@
       num_virtual_methods = it.NumVirtualMethods();
     }
     const char* descriptor = dex_file->GetClassDescriptor(class_def);
-    SirtRef<mirror::ClassLoader> loader(soa.Self(), nullptr);
-    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor, loader);
+    StackHandleScope<1> hs(soa.Self());
+    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
+                                                   NullHandle<mirror::ClassLoader>());
 
     const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i);
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor;
@@ -176,8 +178,9 @@
   // If this test is failing and you have to update these constants,
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(80U, sizeof(OatHeader));
-  EXPECT_EQ(20U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(12U, sizeof(OatMethodHeader));
+  EXPECT_EQ(8U, sizeof(OatMethodOffsets));
+  EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(80 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index bbc9c3e..5d532ab 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -33,7 +33,7 @@
 #include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -331,9 +331,6 @@
     if (compiled_method != nullptr) {
       // Derived from CompiledMethod.
       uint32_t quick_code_offset = 0;
-      uint32_t frame_size_in_bytes = kStackAlignment;
-      uint32_t core_spill_mask = 0;
-      uint32_t fp_spill_mask = 0;
 
       const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
       const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
@@ -351,7 +348,7 @@
         uint32_t code_size = quick_code->size() * sizeof(uint8_t);
         CHECK_NE(code_size, 0U);
         uint32_t thumb_offset = compiled_method->CodeDelta();
-        quick_code_offset = offset_ + sizeof(OatMethodHeader) + thumb_offset;
+        quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
 
         std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
         if (cfi_info != nullptr) {
@@ -374,27 +371,45 @@
           }
         }
 
-        DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
-        OatMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-        method_header->code_size_ = code_size;
-
         // Deduplicate code arrays.
         auto code_iter = dedupe_map_.find(compiled_method);
         if (code_iter != dedupe_map_.end()) {
           quick_code_offset = code_iter->second;
-          FixupMethodHeader(method_header, quick_code_offset - thumb_offset);
         } else {
           dedupe_map_.Put(compiled_method, quick_code_offset);
-          FixupMethodHeader(method_header, quick_code_offset - thumb_offset);
+        }
+
+        // Update quick method header.
+        DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
+        OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
+        uint32_t mapping_table_offset = method_header->mapping_table_offset_;
+        uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+        // The code offset was 0 when the mapping/vmap table offset was set, so it's set
+        // to 0-offset and we need to adjust it by code_offset.
+        uint32_t code_offset = quick_code_offset - thumb_offset;
+        if (mapping_table_offset != 0u) {
+          mapping_table_offset += code_offset;
+          DCHECK_LT(mapping_table_offset, code_offset);
+        }
+        if (vmap_table_offset != 0u) {
+          vmap_table_offset += code_offset;
+          DCHECK_LT(vmap_table_offset, code_offset);
+        }
+        uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
+        uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
+        uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
+        *method_header = OatQuickMethodHeader(mapping_table_offset, vmap_table_offset,
+                                              frame_size_in_bytes, core_spill_mask, fp_spill_mask,
+                                              code_size);
+
+        // Update checksum if this wasn't a duplicate.
+        if (code_iter == dedupe_map_.end()) {
           writer_->oat_header_->UpdateChecksum(method_header, sizeof(*method_header));
           offset_ += sizeof(*method_header);  // Method header is prepended before code.
           writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           offset_ += code_size;
         }
       }
-      frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
-      core_spill_mask = compiled_method->GetCoreSpillMask();
-      fp_spill_mask = compiled_method->GetFpSpillMask();
 
       if (kIsDebugBuild) {
         // We expect GC maps except when the class hasn't been verified or the method is native.
@@ -421,9 +436,6 @@
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
       OatMethodOffsets* offsets = &oat_class->method_offsets_[method_offsets_index_];
       offsets->code_offset_ = quick_code_offset;
-      offsets->frame_size_in_bytes_ = frame_size_in_bytes;
-      offsets->core_spill_mask_ = core_spill_mask;
-      offsets->fp_spill_mask_ = fp_spill_mask;
       ++method_offsets_index_;
     }
 
@@ -431,19 +443,6 @@
   }
 
  private:
-  static void FixupMethodHeader(OatMethodHeader* method_header, uint32_t code_offset) {
-    // The code offset was 0 when the mapping/vmap table offset was set, so it's set
-    // to 0-offset and we need to adjust it by code_offset.
-    if (method_header->mapping_table_offset_ != 0u) {
-      method_header->mapping_table_offset_ += code_offset;
-      DCHECK_LT(method_header->mapping_table_offset_, code_offset);
-    }
-    if (method_header->vmap_table_offset_ != 0u) {
-      method_header->vmap_table_offset_ += code_offset;
-      DCHECK_LT(method_header->vmap_table_offset_, code_offset);
-    }
-  }
-
   // Deduplication is already done on a pointer basis by the compiler driver,
   // so we can simply compare the pointers to find out if things are duplicated.
   SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_;
@@ -501,55 +500,24 @@
     OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
-    OatMethodOffsets offsets(0u, kStackAlignment, 0u, 0u, 0u);
+    OatMethodOffsets offsets(0u, 0u);
     if (compiled_method != nullptr) {
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
       offsets = oat_class->method_offsets_[method_offsets_index_];
       ++method_offsets_index_;
     }
 
-    // Derive frame size and spill masks for native methods without code:
-    // These are generic JNI methods...
-    uint32_t method_idx = it.GetMemberIndex();
-    bool is_native = (it.GetMemberAccessFlags() & kAccNative) != 0;
-    if (is_native && compiled_method == nullptr) {
-      // Compute Sirt size as putting _every_ reference into it, even null ones.
-      uint32_t s_len;
-      const char* shorty = dex_file_->GetMethodShorty(dex_file_->GetMethodId(method_idx),
-                                                      &s_len);
-      DCHECK(shorty != nullptr);
-      uint32_t refs = 1;    // Native method always has "this" or class.
-      for (uint32_t i = 1; i < s_len; ++i) {
-        if (shorty[i] == 'L') {
-          refs++;
-        }
-      }
-      size_t pointer_size = GetInstructionSetPointerSize(
-          writer_->compiler_driver_->GetInstructionSet());
-      size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(pointer_size, refs);
-
-      // Get the generic spill masks and base frame size.
-      mirror::ArtMethod* callee_save_method =
-          Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
-
-      offsets.frame_size_in_bytes_ = callee_save_method->GetFrameSizeInBytes() + sirt_size;
-      offsets.core_spill_mask_ = callee_save_method->GetCoreSpillMask();
-      offsets.fp_spill_mask_ = callee_save_method->GetFpSpillMask();
-      DCHECK_EQ(offsets.gc_map_offset_, 0u);
-    }
-
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
     InvokeType invoke_type = it.GetMethodInvokeType(dex_file_->GetClassDef(class_def_index_));
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(*dex_file_));
-    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
-    mirror::ArtMethod* method = linker->ResolveMethod(*dex_file_, method_idx, dex_cache,
-                                                      class_loader, nullptr, invoke_type);
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file_)));
+    mirror::ArtMethod* method = linker->ResolveMethod(*dex_file_, it.GetMemberIndex(), dex_cache,
+                                                      NullHandle<mirror::ClassLoader>(),
+                                                      NullHandle<mirror::ArtMethod>(),
+                                                      invoke_type);
     CHECK(method != NULL);
-    method->SetFrameSizeInBytes(offsets.frame_size_in_bytes_);
-    method->SetCoreSpillMask(offsets.core_spill_mask_);
-    method->SetFpSpillMask(offsets.fp_spill_mask_);
     // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking.
     method->SetQuickOatCodeOffset(offsets.code_offset_);
     method->SetOatNativeGcMapOffset(offsets.gc_map_offset_);
@@ -601,10 +569,10 @@
         // Deduplicate code arrays.
         const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
         DCHECK(method_offsets.code_offset_ < offset_ || method_offsets.code_offset_ ==
-                   offset_ + sizeof(OatMethodHeader) + compiled_method->CodeDelta())
+                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         if (method_offsets.code_offset_ >= offset_) {
-          const OatMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_];
+          const OatQuickMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_];
           if (!out->WriteFully(&method_header, sizeof(method_header))) {
             ReportWriteFailure("method header", it);
             return false;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7cdd532..8c20aa8 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -18,15 +18,14 @@
 #define ART_COMPILER_OAT_WRITER_H_
 
 #include <stdint.h>
-
 #include <cstddef>
+#include <memory>
 
 #include "driver/compiler_driver.h"
 #include "mem_map.h"
 #include "oat.h"
 #include "mirror/class.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -231,7 +230,7 @@
     // oat_method_offsets_offsets_from_oat_class_ should contain 0
     // values in this case).
     std::vector<OatMethodOffsets> method_offsets_;
-    std::vector<OatMethodHeader> method_headers_;
+    std::vector<OatQuickMethodHeader> method_headers_;
 
    private:
     DISALLOW_COPY_AND_ASSIGN(OatClass);
@@ -256,16 +255,16 @@
   OatHeader* oat_header_;
   std::vector<OatDexFile*> oat_dex_files_;
   std::vector<OatClass*> oat_classes_;
-  UniquePtr<const std::vector<uint8_t> > interpreter_to_interpreter_bridge_;
-  UniquePtr<const std::vector<uint8_t> > interpreter_to_compiled_code_bridge_;
-  UniquePtr<const std::vector<uint8_t> > jni_dlsym_lookup_;
-  UniquePtr<const std::vector<uint8_t> > portable_imt_conflict_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > portable_resolution_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > portable_to_interpreter_bridge_;
-  UniquePtr<const std::vector<uint8_t> > quick_generic_jni_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_imt_conflict_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_resolution_trampoline_;
-  UniquePtr<const std::vector<uint8_t> > quick_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_compiled_code_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_imt_conflict_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_resolution_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> portable_to_interpreter_bridge_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_resolution_trampoline_;
+  std::unique_ptr<const std::vector<uint8_t>> quick_to_interpreter_bridge_;
 
   // output stats
   uint32_t size_dex_file_alignment_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1efdd38..521992a 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -311,6 +311,10 @@
     }
   }
 
+  if (return_type == Primitive::kPrimDouble || return_type == Primitive::kPrimFloat) {
+    return false;
+  }
+
   DCHECK_EQ(argument_index, number_of_arguments);
   current_block_->AddInstruction(invoke);
   return true;
@@ -409,7 +413,7 @@
       uint32_t method_idx = instruction.VRegB_35c();
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
-      instruction.GetArgs(args);
+      instruction.GetVarArgs(args);
       if (!BuildInvoke(instruction, dex_offset, method_idx, number_of_vreg_arguments, false, args, -1)) {
         return false;
       }
@@ -495,6 +499,7 @@
       break;
     }
 
+    case Instruction::MOVE_RESULT:
     case Instruction::MOVE_RESULT_WIDE: {
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ff316e5..beafbcc 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -30,12 +30,12 @@
 namespace art {
 
 void CodeGenerator::Compile(CodeAllocator* allocator) {
-  const GrowableArray<HBasicBlock*>* blocks = GetGraph()->GetBlocks();
-  DCHECK(blocks->Get(0) == GetGraph()->GetEntryBlock());
-  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks->Get(1)));
+  const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
+  DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock());
+  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1)));
   GenerateFrameEntry();
-  for (size_t i = 0; i < blocks->Size(); i++) {
-    CompileBlock(blocks->Get(i));
+  for (size_t i = 0, e = blocks.Size(); i < e; ++i) {
+    CompileBlock(blocks.Get(i));
   }
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
@@ -47,7 +47,7 @@
   Bind(GetLabelOf(block));
   HGraphVisitor* location_builder = GetLocationBuilder();
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
-  for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
     current->Accept(location_builder);
     InitLocations(current);
@@ -132,6 +132,12 @@
     }
   }
 
+  // Make all registers available for the return value.
+  for (size_t i = 0, e = GetNumberOfRegisters(); i < e; ++i) {
+    blocked_registers_[i] = false;
+  }
+  SetupBlockedRegisters(blocked_registers_);
+
   Location result_location = locations->Out();
   if (result_location.IsUnallocated()) {
     switch (result_location.GetPolicy()) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 74cbccc..e18902f 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -20,6 +20,7 @@
 #include "base/bit_field.h"
 #include "globals.h"
 #include "instruction_set.h"
+#include "locations.h"
 #include "memory_region.h"
 #include "nodes.h"
 #include "utils/assembler.h"
@@ -46,267 +47,6 @@
   uintptr_t native_pc;
 };
 
-/**
- * A Location is an abstraction over the potential location
- * of an instruction. It could be in register or stack.
- */
-class Location : public ValueObject {
- public:
-  enum Kind {
-    kInvalid = 0,
-    kStackSlot = 1,  // Word size slot.
-    kDoubleStackSlot = 2,  // 64bit stack slot.
-    kRegister = 3,
-    // On 32bits architectures, quick can pass a long where the
-    // low bits are in the last parameter register, and the high
-    // bits are in a stack slot. The kQuickParameter kind is for
-    // handling this special case.
-    kQuickParameter = 4,
-
-    // Unallocated location represents a location that is not fixed and can be
-    // allocated by a register allocator.  Each unallocated location has
-    // a policy that specifies what kind of location is suitable. Payload
-    // contains register allocation policy.
-    kUnallocated = 5,
-  };
-
-  Location() : value_(kInvalid) {
-    DCHECK(!IsValid());
-  }
-
-  Location(const Location& other) : ValueObject(), value_(other.value_) {}
-
-  Location& operator=(const Location& other) {
-    value_ = other.value_;
-    return *this;
-  }
-
-  bool IsValid() const {
-    return value_ != kInvalid;
-  }
-
-  // Register locations.
-  static Location RegisterLocation(ManagedRegister reg) {
-    return Location(kRegister, reg.RegId());
-  }
-
-  bool IsRegister() const {
-    return GetKind() == kRegister;
-  }
-
-  ManagedRegister reg() const {
-    DCHECK(IsRegister());
-    return static_cast<ManagedRegister>(GetPayload());
-  }
-
-  static uword EncodeStackIndex(intptr_t stack_index) {
-    DCHECK(-kStackIndexBias <= stack_index);
-    DCHECK(stack_index < kStackIndexBias);
-    return static_cast<uword>(kStackIndexBias + stack_index);
-  }
-
-  static Location StackSlot(intptr_t stack_index) {
-    uword payload = EncodeStackIndex(stack_index);
-    Location loc(kStackSlot, payload);
-    // Ensure that sign is preserved.
-    DCHECK_EQ(loc.GetStackIndex(), stack_index);
-    return loc;
-  }
-
-  bool IsStackSlot() const {
-    return GetKind() == kStackSlot;
-  }
-
-  static Location DoubleStackSlot(intptr_t stack_index) {
-    uword payload = EncodeStackIndex(stack_index);
-    Location loc(kDoubleStackSlot, payload);
-    // Ensure that sign is preserved.
-    DCHECK_EQ(loc.GetStackIndex(), stack_index);
-    return loc;
-  }
-
-  bool IsDoubleStackSlot() const {
-    return GetKind() == kDoubleStackSlot;
-  }
-
-  intptr_t GetStackIndex() const {
-    DCHECK(IsStackSlot() || IsDoubleStackSlot());
-    // Decode stack index manually to preserve sign.
-    return GetPayload() - kStackIndexBias;
-  }
-
-  intptr_t GetHighStackIndex(uintptr_t word_size) const {
-    DCHECK(IsDoubleStackSlot());
-    // Decode stack index manually to preserve sign.
-    return GetPayload() - kStackIndexBias + word_size;
-  }
-
-  static Location QuickParameter(uint32_t parameter_index) {
-    return Location(kQuickParameter, parameter_index);
-  }
-
-  uint32_t GetQuickParameterIndex() const {
-    DCHECK(IsQuickParameter());
-    return GetPayload();
-  }
-
-  bool IsQuickParameter() const {
-    return GetKind() == kQuickParameter;
-  }
-
-  arm::ArmManagedRegister AsArm() const;
-  x86::X86ManagedRegister AsX86() const;
-
-  Kind GetKind() const {
-    return KindField::Decode(value_);
-  }
-
-  bool Equals(Location other) const {
-    return value_ == other.value_;
-  }
-
-  const char* DebugString() const {
-    switch (GetKind()) {
-      case kInvalid: return "?";
-      case kRegister: return "R";
-      case kStackSlot: return "S";
-      case kDoubleStackSlot: return "DS";
-      case kQuickParameter: return "Q";
-      case kUnallocated: return "U";
-    }
-    return "?";
-  }
-
-  // Unallocated locations.
-  enum Policy {
-    kAny,
-    kRequiresRegister,
-    kSameAsFirstInput,
-  };
-
-  bool IsUnallocated() const {
-    return GetKind() == kUnallocated;
-  }
-
-  static Location UnallocatedLocation(Policy policy) {
-    return Location(kUnallocated, PolicyField::Encode(policy));
-  }
-
-  // Any free register is suitable to replace this unallocated location.
-  static Location Any() {
-    return UnallocatedLocation(kAny);
-  }
-
-  static Location RequiresRegister() {
-    return UnallocatedLocation(kRequiresRegister);
-  }
-
-  // The location of the first input to the instruction will be
-  // used to replace this unallocated location.
-  static Location SameAsFirstInput() {
-    return UnallocatedLocation(kSameAsFirstInput);
-  }
-
-  Policy GetPolicy() const {
-    DCHECK(IsUnallocated());
-    return PolicyField::Decode(GetPayload());
-  }
-
-  uword GetEncoding() const {
-    return GetPayload();
-  }
-
- private:
-  // Number of bits required to encode Kind value.
-  static constexpr uint32_t kBitsForKind = 4;
-  static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
-
-  explicit Location(uword value) : value_(value) {}
-
-  Location(Kind kind, uword payload)
-      : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {}
-
-  uword GetPayload() const {
-    return PayloadField::Decode(value_);
-  }
-
-  typedef BitField<Kind, 0, kBitsForKind> KindField;
-  typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField;
-
-  // Layout for kUnallocated locations payload.
-  typedef BitField<Policy, 0, 3> PolicyField;
-
-  // Layout for stack slots.
-  static const intptr_t kStackIndexBias =
-      static_cast<intptr_t>(1) << (kBitsForPayload - 1);
-
-  // Location either contains kind and payload fields or a tagged handle for
-  // a constant locations. Values of enumeration Kind are selected in such a
-  // way that none of them can be interpreted as a kConstant tag.
-  uword value_;
-};
-
-/**
- * The code generator computes LocationSummary for each instruction so that
- * the instruction itself knows what code to generate: where to find the inputs
- * and where to place the result.
- *
- * The intent is to have the code for generating the instruction independent of
- * register allocation. A register allocator just has to provide a LocationSummary.
- */
-class LocationSummary : public ArenaObject {
- public:
-  explicit LocationSummary(HInstruction* instruction)
-      : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
-        temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) {
-    inputs_.SetSize(instruction->InputCount());
-    for (size_t i = 0; i < instruction->InputCount(); i++) {
-      inputs_.Put(i, Location());
-    }
-  }
-
-  void SetInAt(uint32_t at, Location location) {
-    inputs_.Put(at, location);
-  }
-
-  Location InAt(uint32_t at) const {
-    return inputs_.Get(at);
-  }
-
-  size_t GetInputCount() const {
-    return inputs_.Size();
-  }
-
-  void SetOut(Location location) {
-    output_ = Location(location);
-  }
-
-  void AddTemp(Location location) {
-    temps_.Add(location);
-  }
-
-  Location GetTemp(uint32_t at) const {
-    return temps_.Get(at);
-  }
-
-  void SetTempAt(uint32_t at, Location location) {
-    temps_.Put(at, location);
-  }
-
-  size_t GetTempCount() const {
-    return temps_.Size();
-  }
-
-  Location Out() const { return output_; }
-
- private:
-  GrowableArray<Location> inputs_;
-  GrowableArray<Location> temps_;
-  Location output_;
-
-  DISALLOW_COPY_AND_ASSIGN(LocationSummary);
-};
-
 class CodeGenerator : public ArenaObject {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -354,7 +94,7 @@
         pc_infos_(graph->GetArena(), 32),
         blocked_registers_(static_cast<bool*>(
             graph->GetArena()->Alloc(number_of_registers * sizeof(bool), kArenaAllocData))) {
-    block_labels_.SetSize(graph->GetBlocks()->Size());
+    block_labels_.SetSize(graph->GetBlocks().Size());
   }
   ~CodeGenerator() { }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index be51232..f1b16a1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -793,5 +793,13 @@
   LOG(FATAL) << "Unimplemented";
 }
 
+void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e4f95c7..b8b25f9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -813,5 +813,13 @@
   LOG(FATAL) << "Unimplemented";
 }
 
+void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d40990e..7684bb1 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -42,7 +42,7 @@
 
  private:
   size_t size_;
-  UniquePtr<uint8_t[]> memory_;
+  std::unique_ptr<uint8_t[]> memory_;
 
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 1c30b79..3062e37 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -32,13 +32,13 @@
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
   graph->BuildDominatorTree();
-  ASSERT_EQ(graph->GetBlocks()->Size(), blocks_length);
-  for (size_t i = 0; i < blocks_length; i++) {
+  ASSERT_EQ(graph->GetBlocks().Size(), blocks_length);
+  for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == -1) {
-      ASSERT_EQ(nullptr, graph->GetBlocks()->Get(i)->GetDominator());
+      ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator());
     } else {
-      ASSERT_NE(nullptr, graph->GetBlocks()->Get(i)->GetDominator());
-      ASSERT_EQ(blocks[i], graph->GetBlocks()->Get(i)->GetDominator()->GetBlockId());
+      ASSERT_NE(nullptr, graph->GetBlocks().Get(i)->GetDominator());
+      ASSERT_EQ(blocks[i], graph->GetBlocks().Get(i)->GetDominator()->GetBlockId());
     }
   }
 }
@@ -167,7 +167,8 @@
     0,
     1,
     1,
-    3
+    3,
+    1,  // Synthesized block to avoid critical edge.
   };
 
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
@@ -185,7 +186,9 @@
     0,
     1,
     1,
-    -1  // exit block is not dominated by any block due to the spin loop.
+    -1,  // exit block is not dominated by any block due to the spin loop.
+    1,   // block to avoid critical edge.
+    1    // block to avoid critical edge.
   };
 
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
@@ -205,7 +208,8 @@
     1,
     1,
     1,
-    -1  // exit block is not dominated by any block due to the spin loop.
+    -1,  // exit block is not dominated by any block due to the spin loop.
+    1    // block to avoid critical edge.
   };
 
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
@@ -225,7 +229,8 @@
     1,
     1,
     1,
-    -1  // exit block is not dominated by any block due to the spin loop.
+    -1,  // exit block is not dominated by any block due to the spin loop.
+    1    // block to avoid critical edge.
   };
 
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
@@ -247,7 +252,9 @@
     2,
     2,
     1,
-    5  // Block number 5 dominates exit block
+    5,    // Block number 5 dominates exit block
+    1,    // block to avoid critical edge.
+    2     // block to avoid critical edge.
   };
 
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
new file mode 100644
index 0000000..fab9f7a
--- /dev/null
+++ b/compiler/optimizing/find_loops_test.cc
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+#include "pretty_printer.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static HGraph* TestCode(const uint16_t* data, ArenaPool* pool) {
+  ArenaAllocator allocator(pool);
+  HGraphBuilder builder(&allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  graph->BuildDominatorTree();
+  graph->FindNaturalLoops();
+  return graph;
+}
+
+TEST(FindLoopsTest, CFG1) {
+  // Constant is not used.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN_VOID);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr);
+  }
+}
+
+TEST(FindLoopsTest, CFG2) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr);
+  }
+}
+
+TEST(FindLoopsTest, CFG3) {
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 3 << 12 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::ADD_INT_2ADDR | 1 << 12,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr);
+  }
+}
+
+TEST(FindLoopsTest, CFG4) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr);
+  }
+}
+
+TEST(FindLoopsTest, CFG5) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr);
+  }
+}
+
+static void TestBlock(HGraph* graph,
+                      int block_id,
+                      bool is_loop_header,
+                      int parent_loop_header_id,
+                      const int* blocks_in_loop = nullptr,
+                      size_t number_of_blocks = 0) {
+  HBasicBlock* block = graph->GetBlocks().Get(block_id);
+  ASSERT_EQ(block->IsLoopHeader(), is_loop_header);
+  if (parent_loop_header_id == -1) {
+    ASSERT_EQ(block->GetLoopInformation(), nullptr);
+  } else {
+    ASSERT_EQ(block->GetLoopInformation()->GetHeader()->GetBlockId(), parent_loop_header_id);
+  }
+
+  if (blocks_in_loop != nullptr) {
+    HLoopInformation* info = block->GetLoopInformation();
+    const BitVector& blocks = info->GetBlocks();
+    ASSERT_EQ(blocks.NumSetBits(), number_of_blocks);
+    for (size_t i = 0; i < number_of_blocks; ++i) {
+      ASSERT_TRUE(blocks.IsBitSet(blocks_in_loop[i]));
+    }
+  } else {
+    ASSERT_FALSE(block->IsLoopHeader());
+  }
+}
+
+TEST(FindLoopsTest, Loop1) {
+  // Simple loop with one preheader and one back edge.
+  // var a = 0;
+  // while (a == a) {
+  // }
+  // return;
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN_VOID);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header
+  const int blocks2[] = {2, 3};
+  TestBlock(graph, 2, true, 2, blocks2, 2);  // loop header
+  TestBlock(graph, 3, false, 2);             // block in loop
+  TestBlock(graph, 4, false, -1);            // return block
+  TestBlock(graph, 5, false, -1);            // exit block
+}
+
+TEST(FindLoopsTest, Loop2) {
+  // Make sure we support a preheader of a loop not being the first predecessor
+  // in the predecessor list of the header.
+  // var a = 0;
+  // while (a == a) {
+  // }
+  // return a;
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x400,
+    Instruction::IF_EQ, 4,
+    Instruction::GOTO | 0xFE00,
+    Instruction::GOTO | 0xFD00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // goto block
+  const int blocks2[] = {2, 3};
+  TestBlock(graph, 2, true, 2, blocks2, 2);  // loop header
+  TestBlock(graph, 3, false, 2);             // block in loop
+  TestBlock(graph, 4, false, -1);            // pre header
+  TestBlock(graph, 5, false, -1);            // return block
+  TestBlock(graph, 6, false, -1);            // exit block
+}
+
+TEST(FindLoopsTest, Loop3) {
+  // Make sure we create a preheader of a loop when a header originally has two
+  // incoming blocks and one back edge.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // goto block
+  TestBlock(graph, 2, false, -1);
+  const int blocks2[] = {3, 4};
+  TestBlock(graph, 3, true, 3, blocks2, 2);  // loop header
+  TestBlock(graph, 4, false, 3);             // block in loop
+  TestBlock(graph, 5, false, -1);            // pre header
+  TestBlock(graph, 6, false, -1);            // return block
+  TestBlock(graph, 7, false, -1);            // exit block
+  TestBlock(graph, 8, false, -1);            // synthesized pre header
+}
+
+TEST(FindLoopsTest, Loop4) {
+  // Test loop with originally two back edges.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 6,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFC00,
+    Instruction::GOTO | 0xFB00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header
+  const int blocks2[] = {2, 3, 4, 5, 8};
+  TestBlock(graph, 2, true, 2, blocks2, 5);  // loop header
+  TestBlock(graph, 3, false, 2);             // block in loop
+  TestBlock(graph, 4, false, 2);             // original back edge
+  TestBlock(graph, 5, false, 2);             // original back edge
+  TestBlock(graph, 6, false, -1);            // return block
+  TestBlock(graph, 7, false, -1);            // exit block
+  TestBlock(graph, 8, false, 2);             // synthesized back edge
+}
+
+
+TEST(FindLoopsTest, Loop5) {
+  // Test loop with two exit edges.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 6,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x0200,
+    Instruction::GOTO | 0xFB00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header
+  const int blocks2[] = {2, 3, 5};
+  TestBlock(graph, 2, true, 2, blocks2, 3);  // loop header
+  TestBlock(graph, 3, false, 2);             // block in loop
+  TestBlock(graph, 4, false, -1);            // loop exit
+  TestBlock(graph, 5, false, 2);             // back edge
+  TestBlock(graph, 6, false, -1);            // return block
+  TestBlock(graph, 7, false, -1);            // exit block
+  TestBlock(graph, 8, false, -1);            // synthesized block at the loop exit
+}
+
+TEST(FindLoopsTest, InnerLoop) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 6,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,  // inner loop
+    Instruction::GOTO | 0xFB00,
+    Instruction::RETURN | 0 << 8);
+
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header of outer loop
+  const int blocks2[] = {2, 3, 4, 5, 8};
+  TestBlock(graph, 2, true, 2, blocks2, 5);  // outer loop header
+  const int blocks3[] = {3, 4};
+  TestBlock(graph, 3, true, 3, blocks3, 2);  // inner loop header
+  TestBlock(graph, 4, false, 3);             // back edge on inner loop
+  TestBlock(graph, 5, false, 2);             // back edge on outer loop
+  TestBlock(graph, 6, false, -1);            // return block
+  TestBlock(graph, 7, false, -1);            // exit block
+  TestBlock(graph, 8, false, 2);             // synthesized block as pre header of inner loop
+
+  ASSERT_TRUE(graph->GetBlocks().Get(3)->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks().Get(2)->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks().Get(2)->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks().Get(3)->GetLoopInformation()));
+}
+
+TEST(FindLoopsTest, TwoLoops) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,  // first loop
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,  // second loop
+    Instruction::RETURN | 0 << 8);
+
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header of first loop
+  const int blocks2[] = {2, 3};
+  TestBlock(graph, 2, true, 2, blocks2, 2);  // first loop header
+  TestBlock(graph, 3, false, 2);             // back edge of first loop
+  const int blocks4[] = {4, 5};
+  TestBlock(graph, 4, true, 4, blocks4, 2);  // second loop header
+  TestBlock(graph, 5, false, 4);             // back edge of second loop
+  TestBlock(graph, 6, false, -1);            // return block
+  TestBlock(graph, 7, false, -1);            // exit block
+
+  ASSERT_FALSE(graph->GetBlocks().Get(4)->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks().Get(2)->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks().Get(2)->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks().Get(4)->GetLoopInformation()));
+}
+
+TEST(FindLoopsTest, NonNaturalLoop) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFD00,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+  ASSERT_TRUE(graph->GetBlocks().Get(3)->IsLoopHeader());
+  HLoopInformation* info = graph->GetBlocks().Get(3)->GetLoopInformation();
+  ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges().Get(0)));
+}
+
+TEST(FindLoopsTest, DoWhileLoop) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 0xFFFF,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool arena;
+  HGraph* graph = TestCode(data, &arena);
+
+  TestBlock(graph, 0, false, -1);            // entry block
+  TestBlock(graph, 1, false, -1);            // pre header of first loop
+  const int blocks2[] = {2, 3, 6};
+  TestBlock(graph, 2, true, 2, blocks2, 3);  // loop header
+  TestBlock(graph, 3, false, 2);             // back edge of first loop
+  TestBlock(graph, 4, false, -1);            // return block
+  TestBlock(graph, 5, false, -1);            // exit block
+  TestBlock(graph, 6, false, 2);             // synthesized block to avoid a critical edge
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
new file mode 100644
index 0000000..52e3e37
--- /dev/null
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph_visualizer.h"
+
+#include "driver/dex_compilation_unit.h"
+#include "nodes.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+/**
+ * HGraph visitor to generate a file suitable for the c1visualizer tool and IRHydra.
+ */
+class HGraphVisualizerPrinter : public HGraphVisitor {
+ public:
+  HGraphVisualizerPrinter(HGraph* graph, std::ostream& output)
+      : HGraphVisitor(graph), output_(output), indent_(0) {}
+
+  void StartTag(const char* name) {
+    AddIndent();
+    output_ << "begin_" << name << std::endl;
+    indent_++;
+  }
+
+  void EndTag(const char* name) {
+    indent_--;
+    AddIndent();
+    output_ << "end_" << name << std::endl;
+  }
+
+  void PrintProperty(const char* name, const char* property) {
+    AddIndent();
+    output_ << name << " \"" << property << "\"" << std::endl;
+  }
+
+  void PrintProperty(const char* name, const char* property, int id) {
+    AddIndent();
+    output_ << name << " \"" << property << id << "\"" << std::endl;
+  }
+
+  void PrintEmptyProperty(const char* name) {
+    AddIndent();
+    output_ << name << std::endl;
+  }
+
+  void PrintTime(const char* name) {
+    AddIndent();
+    output_ << name << " " << time(NULL) << std::endl;
+  }
+
+  void PrintInt(const char* name, int value) {
+    AddIndent();
+    output_ << name << " " << value << std::endl;
+  }
+
+  void AddIndent() {
+    for (size_t i = 0; i < indent_; ++i) {
+      output_ << "  ";
+    }
+  }
+
+  void PrintPredecessors(HBasicBlock* block) {
+    AddIndent();
+    output_ << "predecessors";
+    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+      output_ << " \"B" << predecessor->GetBlockId() << "\" ";
+    }
+    output_<< std::endl;
+  }
+
+  void PrintSuccessors(HBasicBlock* block) {
+    AddIndent();
+    output_ << "successors";
+    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+      HBasicBlock* successor = block->GetSuccessors().Get(i);
+      output_ << " \"B" << successor->GetBlockId() << "\" ";
+    }
+    output_<< std::endl;
+  }
+
+
+  void VisitInstruction(HInstruction* instruction) {
+    output_ << instruction->DebugName();
+    if (instruction->InputCount() > 0) {
+      output_ << " [ ";
+      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
+        output_ << "v" << inputs.Current()->GetId() << " ";
+      }
+      output_ << "]";
+    }
+    if (instruction->GetLifetimePosition() != kNoLifetime) {
+      output_ << " (liveness: " << instruction->GetLifetimePosition();
+      if (instruction->HasLiveInterval()) {
+        output_ << " ";
+        const GrowableArray<LiveRange>& ranges = instruction->GetLiveInterval()->GetRanges();
+        size_t i = ranges.Size() - 1;
+        do {
+          output_ << "[" << ranges.Get(i).GetStart() << "," << ranges.Get(i).GetEnd() << "[";
+          if (i == 0) {
+            break;
+          } else {
+            --i;
+            output_ << ",";
+          }
+        } while (true);
+      }
+      output_ << ")";
+    }
+  }
+
+  void PrintInstructions(const HInstructionList& list) {
+    const char* kEndInstructionMarker = "<|@";
+    for (HInstructionIterator it(list); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      AddIndent();
+      int bci = 0;
+      output_ << bci << " " << instruction->NumberOfUses() << " v" << instruction->GetId() << " ";
+      instruction->Accept(this);
+      output_ << kEndInstructionMarker << std::endl;
+    }
+  }
+
+  void Run(const char* pass_name) {
+    StartTag("cfg");
+    PrintProperty("name", pass_name);
+    VisitInsertionOrder();
+    EndTag("cfg");
+  }
+
+  void VisitBasicBlock(HBasicBlock* block) {
+    StartTag("block");
+    PrintProperty("name", "B", block->GetBlockId());
+    if (block->GetLifetimeStart() != kNoLifetime) {
+      // Piggy back on these fields to show the lifetime of the block.
+      PrintInt("from_bci", block->GetLifetimeStart());
+      PrintInt("to_bci", block->GetLifetimeEnd());
+    } else {
+      PrintInt("from_bci", -1);
+      PrintInt("to_bci", -1);
+    }
+    PrintPredecessors(block);
+    PrintSuccessors(block);
+    PrintEmptyProperty("xhandlers");
+    PrintEmptyProperty("flags");
+    if (block->GetDominator() != nullptr) {
+      PrintProperty("dominator", "B", block->GetDominator()->GetBlockId());
+    }
+
+    StartTag("states");
+    StartTag("locals");
+    PrintInt("size", 0);
+    PrintProperty("method", "None");
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      AddIndent();
+      HInstruction* instruction = it.Current();
+      output_ << instruction->GetId() << " v" << instruction->GetId() << "[ ";
+      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
+        output_ << inputs.Current()->GetId() << " ";
+      }
+      output_ << "]" << std::endl;
+    }
+    EndTag("locals");
+    EndTag("states");
+
+    StartTag("HIR");
+    PrintInstructions(block->GetPhis());
+    PrintInstructions(block->GetInstructions());
+    EndTag("HIR");
+    EndTag("block");
+  }
+
+ private:
+  std::ostream& output_;
+  size_t indent_;
+
+  DISALLOW_COPY_AND_ASSIGN(HGraphVisualizerPrinter);
+};
+
+HGraphVisualizer::HGraphVisualizer(std::ostream* output,
+                                   HGraph* graph,
+                                   const char* string_filter,
+                                   const DexCompilationUnit& cu)
+    : output_(output), graph_(graph), is_enabled_(false) {
+  if (output == nullptr) {
+    return;
+  }
+  std::string pretty_name = PrettyMethod(cu.GetDexMethodIndex(), *cu.GetDexFile());
+  if (pretty_name.find(string_filter) == std::string::npos) {
+    return;
+  }
+
+  is_enabled_ = true;
+  HGraphVisualizerPrinter printer(graph, *output_);
+  printer.StartTag("compilation");
+  printer.PrintProperty("name", pretty_name.c_str());
+  printer.PrintProperty("method", pretty_name.c_str());
+  printer.PrintTime("date");
+  printer.EndTag("compilation");
+}
+
+HGraphVisualizer::HGraphVisualizer(std::ostream* output,
+                                   HGraph* graph,
+                                   const char* name)
+    : output_(output), graph_(graph), is_enabled_(false) {
+  if (output == nullptr) {
+    return;
+  }
+
+  is_enabled_ = true;
+  HGraphVisualizerPrinter printer(graph, *output_);
+  printer.StartTag("compilation");
+  printer.PrintProperty("name", name);
+  printer.PrintProperty("method", name);
+  printer.PrintTime("date");
+  printer.EndTag("compilation");
+}
+
+void HGraphVisualizer::DumpGraph(const char* pass_name) {
+  if (!is_enabled_) {
+    return;
+  }
+  HGraphVisualizerPrinter printer(graph_, *output_);
+  printer.Run(pass_name);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
new file mode 100644
index 0000000..2b88e65
--- /dev/null
+++ b/compiler/optimizing/graph_visualizer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_GRAPH_VISUALIZER_H_
+#define ART_COMPILER_OPTIMIZING_GRAPH_VISUALIZER_H_
+
+#include "utils/allocation.h"
+
+namespace art {
+
+class DexCompilationUnit;
+class HGraph;
+
+/**
+ * If enabled, emits compilation information suitable for the c1visualizer tool
+ * and IRHydra.
+ * Currently only works if the compiler is single threaded.
+ */
+class HGraphVisualizer : public ValueObject {
+ public:
+  /**
+   * If output is not null, and the method name of the dex compilation
+   * unit contains `string_filter`, the compilation information will be
+   * emitted.
+   */
+  HGraphVisualizer(std::ostream* output,
+                   HGraph* graph,
+                   const char* string_filter,
+                   const DexCompilationUnit& cu);
+
+  /**
+   * Version of `HGraphVisualizer` for unit testing, that is when a
+   * `DexCompilationUnit` is not available.
+   */
+  HGraphVisualizer(std::ostream* output, HGraph* graph, const char* name);
+
+  /**
+   * If this visualizer is enabled, emit the compilation information
+   * in `output_`.
+   */
+  void DumpGraph(const char* pass_name);
+
+ private:
+  std::ostream* const output_;
+  HGraph* const graph_;
+
+  // Is true when `output_` is not null, and the compiled method's name
+  // contains the string_filter given in the constructor.
+  bool is_enabled_;
+
+  DISALLOW_COPY_AND_ASSIGN(HGraphVisualizer);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_GRAPH_VISUALIZER_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
new file mode 100644
index 0000000..f9ae529
--- /dev/null
+++ b/compiler/optimizing/linearize_test.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "base/stringprintf.h"
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "graph_visualizer.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pretty_printer.h"
+#include "ssa_builder.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraphBuilder builder(&allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  ASSERT_NE(graph, nullptr);
+
+  graph->BuildDominatorTree();
+  graph->FindNaturalLoops();
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks);
+  for (size_t i = 0; i < number_of_blocks; ++i) {
+    ASSERT_EQ(liveness.GetLinearPostOrder().Get(number_of_blocks - i - 1)->GetBlockId(),
+              expected_order[i]);
+  }
+}
+
+TEST(LinearizeTest, CFG1) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block5   Block7  +
+  //         |        |     +
+  //       Block6   Block3  +
+  //               + /   \  +
+  //           Block4   Block8
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 5,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN_VOID);
+
+  const int blocks[] = {0, 1, 2, 7, 3, 4, 8, 5, 6};
+  TestCode(data, blocks, 9);
+}
+
+TEST(LinearizeTest, CFG2) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block3   Block7  +
+  //         |        |     +
+  //       Block6   Block4  +
+  //               + /   \  +
+  //           Block5   Block8
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::RETURN_VOID,
+    Instruction::IF_EQ, 0xFFFD,
+    Instruction::GOTO | 0xFE00);
+
+  const int blocks[] = {0, 1, 2, 7, 4, 5, 8, 3, 6};
+  TestCode(data, blocks, 9);
+}
+
+TEST(LinearizeTest, CFG3) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block3   Block8  +
+  //         |        |     +
+  //       Block7   Block5  +
+  //                 / +  \ +
+  //           Block6  + Block9
+  //             |     +
+  //           Block4 ++
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 0xFFFC,
+    Instruction::GOTO | 0xFD00);
+
+  const int blocks[] = {0, 1, 2, 8, 5, 6, 4, 9, 3, 7};
+  TestCode(data, blocks, 10);
+}
+
+TEST(LinearizeTest, CFG4) {
+  /* Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2
+  //            / +  \
+  //       Block6 + Block8
+  //         |    +   |
+  //       Block7 + Block3 +++++++
+  //              +  /  \        +
+  //           Block9   Block10  +
+  //                      |      +
+  //                    Block4   +
+  //                  + /    \   +
+  //                Block5  Block11
+  */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 7,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN_VOID);
+
+  const int blocks[] = {0, 1, 2, 8, 3, 10, 4, 5, 11, 9, 6, 7};
+  TestCode(data, blocks, 12);
+}
+
+TEST(LinearizeTest, CFG5) {
+  /* Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2
+  //            / +  \
+  //       Block3 + Block8
+  //         |    +   |
+  //       Block7 + Block4 +++++++
+  //              +  /  \        +
+  //           Block9   Block10  +
+  //                      |      +
+  //                    Block5   +
+  //                   +/    \   +
+  //                Block6  Block11
+  */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::RETURN_VOID,
+    Instruction::IF_EQ, 0xFFFD,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00);
+
+  const int blocks[] = {0, 1, 2, 8, 4, 10, 5, 6, 11, 9, 3, 7};
+  TestCode(data, blocks, 12);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
new file mode 100644
index 0000000..9849388
--- /dev/null
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
+  HGraphBuilder builder(allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  graph->FindNaturalLoops();
+  return graph;
+}
+
+TEST(LiveRangesTest, CFG1) {
+  /*
+   * Test the following snippet:
+   *  return 0;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: goto
+   *           |
+   *       6: return
+   *           |
+   *       9: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the return instruction.
+  ASSERT_EQ(6u, range.GetEnd());
+  HBasicBlock* block = graph->GetBlocks().Get(1);
+  ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
+  ASSERT_EQ(6u, block->GetLastInstruction()->GetLifetimePosition());
+}
+
+TEST(LiveRangesTest, CFG2) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  if (0 == 0) {
+   *  } else {
+   *  }
+   *  return a;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: goto
+   *           |
+   *       6: equal
+   *       7: if
+   *       /       \
+   *   10: goto   13: goto
+   *       \       /
+   *       16: return
+   *         |
+   *       19: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the return instruction.
+  ASSERT_EQ(16u, range.GetEnd());
+  HBasicBlock* block = graph->GetBlocks().Get(3);
+  ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr);
+  ASSERT_EQ(16u, block->GetLastInstruction()->GetLifetimePosition());
+}
+
+TEST(LiveRangesTest, CFG3) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  if (0 == 0) {
+   *  } else {
+   *    a = 4;
+   *  }
+   *  return a;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: constant4
+   *       4: goto
+   *           |
+   *       7: equal
+   *       8: if
+   *       /       \
+   *   11: goto   14: goto
+   *       \       /
+   *       16: phi
+   *       17: return
+   *         |
+   *       20: exit
+   */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the phi at the return block so instruction is live until
+  // the end of the then block.
+  ASSERT_EQ(12u, range.GetEnd());
+
+  // Test for the 4 constant.
+  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  // The then branch is a hole for this constant, therefore its interval has 2 ranges.
+  ASSERT_EQ(2u, interval->GetRanges().Size());
+  // First range is the else block.
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(13u, range.GetStart());
+  // Last use is the phi at the return block.
+  ASSERT_EQ(15u, range.GetEnd());
+  // Second range starts from the definition and ends at the if block.
+  range = interval->GetRanges().Get(1);
+  ASSERT_EQ(3u, range.GetStart());
+  // 9 is the end of the if block.
+  ASSERT_EQ(9u, range.GetEnd());
+
+  // Test for the phi.
+  interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(16u, range.GetStart());
+  ASSERT_EQ(17u, range.GetEnd());
+}
+
+TEST(LiveRangesTest, Loop) {
+  /*
+   * Test the following snippet:
+   *  var a = 0;
+   *  while (a == a) {
+   *    a = 4;
+   *  }
+   *  return 5;
+   *
+   * Which becomes the following graph (numbered by lifetime position):
+   *       2: constant0
+   *       3: constant4
+   *       4: constant5
+   *       5: goto
+   *           |
+   *       8: goto
+   *           |
+   *       10: phi
+   *       11: equal
+   *       12: if +++++
+   *        |       \ +
+   *        |     15: goto
+   *        |
+   *       18: return
+   *         |
+   *       21: exit
+   */
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::CONST_4 | 5 << 12 | 1 << 8,
+    Instruction::RETURN | 1 << 8);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildGraph(data, &allocator);
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  // Test for the 0 constant.
+  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  LiveRange range = interval->GetRanges().Get(0);
+  ASSERT_EQ(2u, range.GetStart());
+  // Last use is the loop phi so instruction is live until
+  // the end of the pre loop header.
+  ASSERT_EQ(9u, range.GetEnd());
+
+  // Test for the 4 constant.
+  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  // The instruction is live until the end of the loop.
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(3u, range.GetStart());
+  ASSERT_EQ(16u, range.GetEnd());
+
+  // Test for the 5 constant.
+  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  // The instruction is live until the return instruction of the loop.
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  ASSERT_EQ(4u, range.GetStart());
+  ASSERT_EQ(18u, range.GetEnd());
+
+  // Test for the phi.
+  interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval();
+  ASSERT_EQ(1u, interval->GetRanges().Size());
+  range = interval->GetRanges().Get(0);
+  // Instruction is consumed by the if.
+  ASSERT_EQ(10u, range.GetStart());
+  ASSERT_EQ(11u, range.GetEnd());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
new file mode 100644
index 0000000..53e7bbe
--- /dev/null
+++ b/compiler/optimizing/liveness_test.cc
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data, const char* expected) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraphBuilder builder(&allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  ASSERT_NE(graph, nullptr);
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+  graph->FindNaturalLoops();
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  std::ostringstream buffer;
+  for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    buffer << "Block " << block->GetBlockId() << std::endl;
+    BitVector* live_in = liveness.GetLiveInSet(*block);
+    live_in->Dump(buffer, "  live in: ");
+    BitVector* live_out = liveness.GetLiveOutSet(*block);
+    live_out->Dump(buffer, "  live out: ");
+    BitVector* kill = liveness.GetKillSet(*block);
+    kill->Dump(buffer, "  kill: ");
+  }
+  ASSERT_STREQ(expected, buffer.str().c_str());
+}
+
+TEST(LivenessTest, CFG1) {
+  const char* expected =
+    "Block 0\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n"
+    "Block 1\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n"
+    "Block 2\n"
+    "  live in: ()\n"
+    "  live out: ()\n"
+    "  kill: ()\n";
+
+  // Constant is not used.
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG2) {
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0)\n"
+    "  live out: (1)\n"
+    "  kill: (1)\n"
+    "Block 1\n"
+    "  live in: (1)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n"
+    "Block 2\n"
+    "  live in: (0)\n"
+    "  live out: (0)\n"
+    "  kill: (0)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::RETURN);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG3) {
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (000)\n"
+    "  live out: (110)\n"
+    "  kill: (110)\n"
+    "Block 1\n"  // block with add
+    "  live in: (110)\n"
+    "  live out: (001)\n"
+    "  kill: (001)\n"
+    "Block 2\n"  // block with return
+    "  live in: (001)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n"
+    "Block 3\n"  // exit block
+    "  live in: (000)\n"
+    "  live out: (000)\n"
+    "  kill: (000)\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 3 << 12 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::ADD_INT_2ADDR | 1 << 12,
+    Instruction::GOTO | 0x100,
+    Instruction::RETURN);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG4) {
+  // var a;
+  // if (0 == 0) {
+  //   a = 5;
+  // } else {
+  //   a = 4;
+  // }
+  // return a;
+  //
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi, equal test)
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
+    "Block 1\n"  // block with if
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00010)\n"
+    "Block 2\n"  // else block
+    "  live in: (01000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 3\n"  // then block
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 4\n"  // return block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00001)\n"
+    "Block 5\n"  // exit block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, CFG5) {
+  // var a = 0;
+  // if (0 == 0) {
+  // } else {
+  //   a = 4;
+  // }
+  // return a;
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"  // block with if
+    "  live in: (1100)\n"
+    "  live out: (1100)\n"
+    "  kill: (0010)\n"
+    "Block 2\n"  // else block
+    "  live in: (0100)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 3\n"  // return block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0001)\n"
+    "Block 4\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // block to avoid critical edge. Predecessor is 1, successor is 3.
+    "  live in: (1000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop1) {
+  // Simple loop with one preheader and one back edge.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return;
+  const char* expected =
+    "Block 0\n"  // entry block
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"  // pre header
+    "  live in: (1100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // return block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop3) {
+  // Test that the returned value stays live in a preceding loop.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return 5;
+  const char* expected =
+    "Block 0\n"
+    "  live in: (00000)\n"
+    "  live out: (11100)\n"
+    "  kill: (11100)\n"
+    "Block 1\n"
+    "  live in: (11100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (01100)\n"
+    "  live out: (01100)\n"
+    "  kill: (00000)\n"
+    "Block 4\n"  // return block
+    "  live in: (00100)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n"
+    "Block 5\n"  // exit block
+    "  live in: (00000)\n"
+    "  live out: (00000)\n"
+    "  kill: (00000)\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::CONST_4 | 5 << 12 | 1 << 8,
+    Instruction::RETURN | 1 << 8);
+
+  TestCode(data, expected);
+}
+
+
+TEST(LivenessTest, Loop4) {
+  // Make sure we support a preheader of a loop not being the first predecessor
+  // in the predecessor list of the header.
+  // var a = 0;
+  // while (a == a) {
+  //   a = 4;
+  // }
+  // return a;
+  // Bitsets are made of:
+  // (constant0, constant4, phi, equal test)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000)\n"
+    "  live out: (1100)\n"
+    "  kill: (1100)\n"
+    "Block 1\n"
+    "  live in: (1100)\n"
+    "  live out: (1100)\n"
+    "  kill: (0000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0100)\n"
+    "  live out: (0110)\n"
+    "  kill: (0011)\n"
+    "Block 3\n"  // back edge
+    "  live in: (0100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // pre loop header
+    "  live in: (1100)\n"
+    "  live out: (0100)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // return block
+    "  live in: (0010)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
+    "Block 6\n"  // exit block
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::GOTO | 0x500,
+    Instruction::IF_EQ, 5,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0xFD00,
+    Instruction::GOTO | 0xFC00,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop5) {
+  // Make sure we create a preheader of a loop when a header originally has two
+  // incoming blocks and one back edge.
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, equal in block 1, phi in block 8, phi in block 4,
+  //  equal in block 4)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000000)\n"
+    "  live out: (1110000)\n"
+    "  kill: (1110000)\n"
+    "Block 1\n"
+    "  live in: (1110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0001000)\n"
+    "Block 2\n"
+    "  live in: (0100000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 3\n"
+    "  live in: (0010000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 4\n"  // loop header
+    "  live in: (0000000)\n"
+    "  live out: (0000010)\n"
+    "  kill: (0000011)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0000010)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (0000010)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 7\n"  // exit block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 8\n"  // synthesized pre header
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000100)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::GOTO | 0x200,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+TEST(LivenessTest, Loop6) {
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3,
+  //  phi in block 8)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000000)\n"
+    "  live out: (1110000)\n"
+    "  kill: (1110000)\n"
+    "Block 1\n"
+    "  live in: (1110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0110000)\n"
+    "  live out: (0111000)\n"
+    "  kill: (0001100)\n"
+    "Block 3\n"
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000010)\n"
+    "Block 4\n"  // original back edge
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 5\n"  // original back edge
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (0001000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 7\n"  // exit block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 8\n"  // synthesized back edge
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000001)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 8,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::GOTO | 0xFA00,
+    Instruction::GOTO | 0xF900,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+
+TEST(LivenessTest, Loop7) {
+  // Bitsets are made of:
+  // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3,
+  //  phi in block 6)
+  const char* expected =
+    "Block 0\n"
+    "  live in: (0000000)\n"
+    "  live out: (1110000)\n"
+    "  kill: (1110000)\n"
+    "Block 1\n"
+    "  live in: (1110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 2\n"  // loop header
+    "  live in: (0110000)\n"
+    "  live out: (0111000)\n"
+    "  kill: (0001100)\n"
+    "Block 3\n"
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000010)\n"
+    "Block 4\n"  // loop exit
+    "  live in: (0010000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0110000)\n"
+    "  live out: (0110000)\n"
+    "  kill: (0000000)\n"
+    "Block 6\n"  // return block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000001)\n"
+    "Block 7\n"  // exit block
+    "  live in: (0000000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n"
+    "Block 8\n"  // synthesized block to avoid critical edge.
+    "  live in: (0001000)\n"
+    "  live out: (0000000)\n"
+    "  kill: (0000000)\n";
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 8,
+    Instruction::CONST_4 | 4 << 12 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::CONST_4 | 5 << 12 | 0,
+    Instruction::GOTO | 0x0200,
+    Instruction::GOTO | 0xF900,
+    Instruction::RETURN | 0 << 8);
+
+  TestCode(data, expected);
+}
+
+}  // namespace art
diff --git a/compiler/dex/bit_vector_block_iterator.cc b/compiler/optimizing/locations.cc
similarity index 61%
copy from compiler/dex/bit_vector_block_iterator.cc
copy to compiler/optimizing/locations.cc
index 32d7d71..98766d2 100644
--- a/compiler/dex/bit_vector_block_iterator.cc
+++ b/compiler/optimizing/locations.cc
@@ -1,4 +1,4 @@
-/*
+  /*
  * Copyright (C) 2014 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,19 +14,19 @@
  * limitations under the License.
  */
 
-#include "bit_vector_block_iterator.h"
-#include "mir_graph.h"
+#include "locations.h"
+
+#include "nodes.h"
 
 namespace art {
 
-BasicBlock* BitVectorBlockIterator::Next() {
-  int idx = internal_iterator_.Next();
-
-  if (idx == -1) {
-    return nullptr;
+LocationSummary::LocationSummary(HInstruction* instruction)
+    : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
+      temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) {
+  inputs_.SetSize(instruction->InputCount());
+  for (size_t i = 0; i < instruction->InputCount(); i++) {
+    inputs_.Put(i, Location());
   }
-
-  return mir_graph_->GetBasicBlock(idx);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
new file mode 100644
index 0000000..3c60d3c
--- /dev/null
+++ b/compiler/optimizing/locations.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_
+#define ART_COMPILER_OPTIMIZING_LOCATIONS_H_
+
+#include "base/bit_field.h"
+#include "utils/allocation.h"
+#include "utils/growable_array.h"
+#include "utils/managed_register.h"
+
+namespace art {
+
+class HInstruction;
+
+/**
+ * A Location is an abstraction over the potential location
+ * of an instruction. It could be in register or stack.
+ */
+class Location : public ValueObject {
+ public:
+  enum Kind {
+    kInvalid = 0,
+    kStackSlot = 1,  // Word size slot.
+    kDoubleStackSlot = 2,  // 64bit stack slot.
+    kRegister = 3,
+    // On 32bits architectures, quick can pass a long where the
+    // low bits are in the last parameter register, and the high
+    // bits are in a stack slot. The kQuickParameter kind is for
+    // handling this special case.
+    kQuickParameter = 4,
+
+    // Unallocated location represents a location that is not fixed and can be
+    // allocated by a register allocator.  Each unallocated location has
+    // a policy that specifies what kind of location is suitable. Payload
+    // contains register allocation policy.
+    kUnallocated = 5,
+  };
+
+  Location() : value_(kInvalid) {
+    DCHECK(!IsValid());
+  }
+
+  Location(const Location& other) : ValueObject(), value_(other.value_) {}
+
+  Location& operator=(const Location& other) {
+    value_ = other.value_;
+    return *this;
+  }
+
+  bool IsValid() const {
+    return value_ != kInvalid;
+  }
+
+  bool IsInvalid() const {
+    return !IsValid();
+  }
+
+  bool IsConstant() const {
+    // TODO: support constants.
+    return false;
+  }
+
+  // Empty location. Used if there the location should be ignored.
+  static Location NoLocation() {
+    return Location();
+  }
+
+  // Register locations.
+  static Location RegisterLocation(ManagedRegister reg) {
+    return Location(kRegister, reg.RegId());
+  }
+
+  bool IsRegister() const {
+    return GetKind() == kRegister;
+  }
+
+  ManagedRegister reg() const {
+    DCHECK(IsRegister());
+    return static_cast<ManagedRegister>(GetPayload());
+  }
+
+  static uword EncodeStackIndex(intptr_t stack_index) {
+    DCHECK(-kStackIndexBias <= stack_index);
+    DCHECK(stack_index < kStackIndexBias);
+    return static_cast<uword>(kStackIndexBias + stack_index);
+  }
+
+  static Location StackSlot(intptr_t stack_index) {
+    uword payload = EncodeStackIndex(stack_index);
+    Location loc(kStackSlot, payload);
+    // Ensure that sign is preserved.
+    DCHECK_EQ(loc.GetStackIndex(), stack_index);
+    return loc;
+  }
+
+  bool IsStackSlot() const {
+    return GetKind() == kStackSlot;
+  }
+
+  static Location DoubleStackSlot(intptr_t stack_index) {
+    uword payload = EncodeStackIndex(stack_index);
+    Location loc(kDoubleStackSlot, payload);
+    // Ensure that sign is preserved.
+    DCHECK_EQ(loc.GetStackIndex(), stack_index);
+    return loc;
+  }
+
+  bool IsDoubleStackSlot() const {
+    return GetKind() == kDoubleStackSlot;
+  }
+
+  intptr_t GetStackIndex() const {
+    DCHECK(IsStackSlot() || IsDoubleStackSlot());
+    // Decode stack index manually to preserve sign.
+    return GetPayload() - kStackIndexBias;
+  }
+
+  intptr_t GetHighStackIndex(uintptr_t word_size) const {
+    DCHECK(IsDoubleStackSlot());
+    // Decode stack index manually to preserve sign.
+    return GetPayload() - kStackIndexBias + word_size;
+  }
+
+  static Location QuickParameter(uint32_t parameter_index) {
+    return Location(kQuickParameter, parameter_index);
+  }
+
+  uint32_t GetQuickParameterIndex() const {
+    DCHECK(IsQuickParameter());
+    return GetPayload();
+  }
+
+  bool IsQuickParameter() const {
+    return GetKind() == kQuickParameter;
+  }
+
+  arm::ArmManagedRegister AsArm() const;
+  x86::X86ManagedRegister AsX86() const;
+
+  Kind GetKind() const {
+    return KindField::Decode(value_);
+  }
+
+  bool Equals(Location other) const {
+    return value_ == other.value_;
+  }
+
+  const char* DebugString() const {
+    switch (GetKind()) {
+      case kInvalid: return "?";
+      case kRegister: return "R";
+      case kStackSlot: return "S";
+      case kDoubleStackSlot: return "DS";
+      case kQuickParameter: return "Q";
+      case kUnallocated: return "U";
+    }
+    return "?";
+  }
+
+  // Unallocated locations.
+  enum Policy {
+    kAny,
+    kRequiresRegister,
+    kSameAsFirstInput,
+  };
+
+  bool IsUnallocated() const {
+    return GetKind() == kUnallocated;
+  }
+
+  static Location UnallocatedLocation(Policy policy) {
+    return Location(kUnallocated, PolicyField::Encode(policy));
+  }
+
+  // Any free register is suitable to replace this unallocated location.
+  static Location Any() {
+    return UnallocatedLocation(kAny);
+  }
+
+  static Location RequiresRegister() {
+    return UnallocatedLocation(kRequiresRegister);
+  }
+
+  // The location of the first input to the instruction will be
+  // used to replace this unallocated location.
+  static Location SameAsFirstInput() {
+    return UnallocatedLocation(kSameAsFirstInput);
+  }
+
+  Policy GetPolicy() const {
+    DCHECK(IsUnallocated());
+    return PolicyField::Decode(GetPayload());
+  }
+
+  uword GetEncoding() const {
+    return GetPayload();
+  }
+
+ private:
+  // Number of bits required to encode Kind value.
+  static constexpr uint32_t kBitsForKind = 4;
+  static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
+
+  explicit Location(uword value) : value_(value) {}
+
+  Location(Kind kind, uword payload)
+      : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {}
+
+  uword GetPayload() const {
+    return PayloadField::Decode(value_);
+  }
+
+  typedef BitField<Kind, 0, kBitsForKind> KindField;
+  typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField;
+
+  // Layout for kUnallocated locations payload.
+  typedef BitField<Policy, 0, 3> PolicyField;
+
+  // Layout for stack slots.
+  static const intptr_t kStackIndexBias =
+      static_cast<intptr_t>(1) << (kBitsForPayload - 1);
+
+  // Location either contains kind and payload fields or a tagged handle for
+  // a constant locations. Values of enumeration Kind are selected in such a
+  // way that none of them can be interpreted as a kConstant tag.
+  uword value_;
+};
+
+/**
+ * The code generator computes LocationSummary for each instruction so that
+ * the instruction itself knows what code to generate: where to find the inputs
+ * and where to place the result.
+ *
+ * The intent is to have the code for generating the instruction independent of
+ * register allocation. A register allocator just has to provide a LocationSummary.
+ */
+class LocationSummary : public ArenaObject {
+ public:
+  explicit LocationSummary(HInstruction* instruction);
+
+  void SetInAt(uint32_t at, Location location) {
+    inputs_.Put(at, location);
+  }
+
+  Location InAt(uint32_t at) const {
+    return inputs_.Get(at);
+  }
+
+  size_t GetInputCount() const {
+    return inputs_.Size();
+  }
+
+  void SetOut(Location location) {
+    output_ = Location(location);
+  }
+
+  void AddTemp(Location location) {
+    temps_.Add(location);
+  }
+
+  Location GetTemp(uint32_t at) const {
+    return temps_.Get(at);
+  }
+
+  void SetTempAt(uint32_t at, Location location) {
+    temps_.Put(at, location);
+  }
+
+  size_t GetTempCount() const {
+    return temps_.Size();
+  }
+
+  Location Out() const { return output_; }
+
+ private:
+  GrowableArray<Location> inputs_;
+  GrowableArray<Location> temps_;
+  Location output_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationSummary);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_LOCATIONS_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3d6aeb7..74ba520 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -25,22 +25,22 @@
   blocks_.Add(block);
 }
 
-void HGraph::FindBackEdges(ArenaBitVector* visited) const {
+void HGraph::FindBackEdges(ArenaBitVector* visited) {
   ArenaBitVector visiting(arena_, blocks_.Size(), false);
   VisitBlockForBackEdges(entry_block_, visited, &visiting);
 }
 
 void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
-  for (size_t i = 0; i < blocks_.Size(); i++) {
+  for (size_t i = 0; i < blocks_.Size(); ++i) {
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_.Get(i);
-      for (size_t j = 0; j < block->GetSuccessors()->Size(); j++) {
-        block->GetSuccessors()->Get(j)->RemovePredecessor(block, false);
+      for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+        block->GetSuccessors().Get(j)->RemovePredecessor(block, false);
       }
-      for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+      for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
         block->RemovePhi(it.Current()->AsPhi());
       }
-      for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         block->RemoveInstruction(it.Current());
       }
     }
@@ -49,14 +49,14 @@
 
 void HGraph::VisitBlockForBackEdges(HBasicBlock* block,
                                     ArenaBitVector* visited,
-                                    ArenaBitVector* visiting) const {
+                                    ArenaBitVector* visiting) {
   int id = block->GetBlockId();
   if (visited->IsBitSet(id)) return;
 
   visited->SetBit(id);
   visiting->SetBit(id);
-  for (size_t i = 0; i < block->GetSuccessors()->Size(); i++) {
-    HBasicBlock* successor = block->GetSuccessors()->Get(i);
+  for (size_t i = 0; i < block->GetSuccessors().Size(); i++) {
+    HBasicBlock* successor = block->GetSuccessors().Get(i);
     if (visiting->IsBitSet(successor->GetBlockId())) {
       successor->AddBackEdge(block);
     } else {
@@ -77,14 +77,18 @@
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (3) Compute the immediate dominator of each block. We visit
+  // (3) Simplify the CFG now, so that we don't need to recompute
+  //     dominators and the reverse post order.
+  SimplifyCFG();
+
+  // (4) Compute the immediate dominator of each block. We visit
   //     the successors of a block only when all its forward branches
   //     have been processed.
   GrowableArray<size_t> visits(arena_, blocks_.Size());
   visits.SetSize(blocks_.Size());
-  dominator_order_.Add(entry_block_);
-  for (size_t i = 0; i < entry_block_->GetSuccessors()->Size(); i++) {
-    VisitBlockForDominatorTree(entry_block_->GetSuccessors()->Get(i), entry_block_, &visits);
+  reverse_post_order_.Add(entry_block_);
+  for (size_t i = 0; i < entry_block_->GetSuccessors().Size(); i++) {
+    VisitBlockForDominatorTree(entry_block_->GetSuccessors().Get(i), entry_block_, &visits);
   }
 }
 
@@ -119,61 +123,183 @@
   // Once all the forward edges have been visited, we know the immediate
   // dominator of the block. We can then start visiting its successors.
   if (visits->Get(block->GetBlockId()) ==
-      block->GetPredecessors()->Size() - block->NumberOfBackEdges()) {
-    dominator_order_.Add(block);
-    for (size_t i = 0; i < block->GetSuccessors()->Size(); i++) {
-      VisitBlockForDominatorTree(block->GetSuccessors()->Get(i), block, visits);
+      block->GetPredecessors().Size() - block->NumberOfBackEdges()) {
+    reverse_post_order_.Add(block);
+    for (size_t i = 0; i < block->GetSuccessors().Size(); i++) {
+      VisitBlockForDominatorTree(block->GetSuccessors().Get(i), block, visits);
     }
   }
 }
 
 void HGraph::TransformToSSA() {
-  DCHECK(!dominator_order_.IsEmpty());
-  SimplifyCFG();
+  DCHECK(!reverse_post_order_.IsEmpty());
   SsaBuilder ssa_builder(this);
   ssa_builder.BuildSsa();
 }
 
-void HGraph::SimplifyCFG() {
-  for (size_t i = 0; i < dominator_order_.Size(); i++) {
-    HBasicBlock* current = dominator_order_.Get(i);
-    if (current->IsLoopHeader()) {
-      // Make sure the loop has only one pre header. This simplifies SSA building by having
-      // to just look at the pre header to know which locals are initialized at entry of the
-      // loop.
-      HLoopInformation* info = current->GetLoopInformation();
-      size_t number_of_incomings = current->GetPredecessors()->Size() - info->NumberOfBackEdges();
-      if (number_of_incomings != 1) {
-        HBasicBlock* pre_header = new (arena_) HBasicBlock(this);
-        AddBlock(pre_header);
-        pre_header->AddInstruction(new (arena_) HGoto());
-        pre_header->SetDominator(current->GetDominator());
-        current->SetDominator(pre_header);
-        dominator_order_.InsertAt(i, pre_header);
-        i++;
-
-        ArenaBitVector back_edges(arena_, GetBlocks()->Size(), false);
-        for (size_t pred = 0; pred < info->GetBackEdges()->Size(); pred++) {
-          back_edges.SetBit(info->GetBackEdges()->Get(pred)->GetBlockId());
-        }
-        for (size_t pred = 0; pred < current->GetPredecessors()->Size(); pred++) {
-          HBasicBlock* predecessor = current->GetPredecessors()->Get(pred);
-          if (!back_edges.IsBitSet(predecessor->GetBlockId())) {
-            current->RemovePredecessor(predecessor);
-            pred--;
-            predecessor->AddSuccessor(pre_header);
-          }
-        }
-        pre_header->AddSuccessor(current);
-      }
-      info->SetPreHeader(current->GetDominator());
+void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
+  // Insert a new node between `block` and `successor` to split the
+  // critical edge.
+  HBasicBlock* new_block = new (arena_) HBasicBlock(this);
+  AddBlock(new_block);
+  new_block->AddInstruction(new (arena_) HGoto());
+  block->RemoveSuccessor(successor);
+  block->AddSuccessor(new_block);
+  new_block->AddSuccessor(successor);
+  if (successor->IsLoopHeader()) {
+    // If we split at a back edge boundary, make the new block the back edge.
+    HLoopInformation* info = successor->GetLoopInformation();
+    if (info->IsBackEdge(block)) {
+      info->RemoveBackEdge(block);
+      info->AddBackEdge(new_block);
     }
   }
 }
 
-void HLoopInformation::SetPreHeader(HBasicBlock* block) {
-  DCHECK_EQ(header_->GetDominator(), block);
-  pre_header_ = block;
+void HGraph::SimplifyLoop(HBasicBlock* header) {
+  HLoopInformation* info = header->GetLoopInformation();
+
+  // If there are more than one back edge, make them branch to the same block that
+  // will become the only back edge. This simplifies finding natural loops in the
+  // graph.
+  if (info->NumberOfBackEdges() > 1) {
+    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this);
+    AddBlock(new_back_edge);
+    new_back_edge->AddInstruction(new (arena_) HGoto());
+    for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
+      HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
+      header->RemovePredecessor(back_edge);
+      back_edge->AddSuccessor(new_back_edge);
+    }
+    info->ClearBackEdges();
+    info->AddBackEdge(new_back_edge);
+    new_back_edge->AddSuccessor(header);
+  }
+
+  // Make sure the loop has only one pre header. This simplifies SSA building by having
+  // to just look at the pre header to know which locals are initialized at entry of the
+  // loop.
+  size_t number_of_incomings = header->GetPredecessors().Size() - info->NumberOfBackEdges();
+  if (number_of_incomings != 1) {
+    HBasicBlock* pre_header = new (arena_) HBasicBlock(this);
+    AddBlock(pre_header);
+    pre_header->AddInstruction(new (arena_) HGoto());
+
+    ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
+    HBasicBlock* back_edge = info->GetBackEdges().Get(0);
+    for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+      if (predecessor != back_edge) {
+        header->RemovePredecessor(predecessor);
+        pred--;
+        predecessor->AddSuccessor(pre_header);
+      }
+    }
+    pre_header->AddSuccessor(header);
+  }
+}
+
+void HGraph::SimplifyCFG() {
+  // Simplify the CFG for future analysis, and code generation:
+  // (1): Split critical edges.
+  // (2): Simplify loops by having only one back edge, and one preheader.
+  for (size_t i = 0; i < blocks_.Size(); ++i) {
+    HBasicBlock* block = blocks_.Get(i);
+    if (block->GetSuccessors().Size() > 1) {
+      for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+        HBasicBlock* successor = block->GetSuccessors().Get(j);
+        if (successor->GetPredecessors().Size() > 1) {
+          SplitCriticalEdge(block, successor);
+          --j;
+        }
+      }
+    }
+    if (block->IsLoopHeader()) {
+      SimplifyLoop(block);
+    }
+  }
+}
+
+bool HGraph::FindNaturalLoops() const {
+  for (size_t i = 0; i < blocks_.Size(); ++i) {
+    HBasicBlock* block = blocks_.Get(i);
+    if (block->IsLoopHeader()) {
+      HLoopInformation* info = block->GetLoopInformation();
+      if (!info->Populate()) {
+        // Abort if the loop is non natural. We currently bailout in such cases.
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
+  if (blocks_.IsBitSet(block->GetBlockId())) {
+    return;
+  }
+
+  blocks_.SetBit(block->GetBlockId());
+  block->SetInLoop(this);
+  for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+    PopulateRecursive(block->GetPredecessors().Get(i));
+  }
+}
+
+bool HLoopInformation::Populate() {
+  DCHECK_EQ(GetBackEdges().Size(), 1u);
+  HBasicBlock* back_edge = GetBackEdges().Get(0);
+  DCHECK(back_edge->GetDominator() != nullptr);
+  if (!header_->Dominates(back_edge)) {
+    // This loop is not natural. Do not bother going further.
+    return false;
+  }
+
+  // Populate this loop: starting with the back edge, recursively add predecessors
+  // that are not already part of that loop. Set the header as part of the loop
+  // to end the recursion.
+  // This is a recursive implementation of the algorithm described in
+  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+  blocks_.SetBit(header_->GetBlockId());
+  PopulateRecursive(back_edge);
+  return true;
+}
+
+HBasicBlock* HLoopInformation::GetPreHeader() const {
+  DCHECK_EQ(header_->GetPredecessors().Size(), 2u);
+  return header_->GetDominator();
+}
+
+bool HLoopInformation::Contains(const HBasicBlock& block) const {
+  return blocks_.IsBitSet(block.GetBlockId());
+}
+
+bool HLoopInformation::IsIn(const HLoopInformation& other) const {
+  return other.blocks_.IsBitSet(header_->GetBlockId());
+}
+
+bool HBasicBlock::Dominates(HBasicBlock* other) const {
+  // Walk up the dominator tree from `other`, to find out if `this`
+  // is an ancestor.
+  HBasicBlock* current = other;
+  while (current != nullptr) {
+    if (current == this) {
+      return true;
+    }
+    current = current->GetDominator();
+  }
+  return false;
+}
+
+void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(cursor->AsPhi() == nullptr);
+  DCHECK(instruction->AsPhi() == nullptr);
+  instruction->next_ = cursor;
+  instruction->previous_ = cursor->previous_;
+  cursor->previous_ = instruction;
+  if (GetFirstInstruction() == cursor) {
+    instructions_.first_instruction_ = instruction;
+  }
 }
 
 static void Add(HInstructionList* instruction_list,
@@ -298,17 +424,17 @@
 #undef DEFINE_ACCEPT
 
 void HGraphVisitor::VisitInsertionOrder() {
-  const GrowableArray<HBasicBlock*>* blocks = graph_->GetBlocks();
-  for (size_t i = 0 ; i < blocks->Size(); i++) {
-    VisitBasicBlock(blocks->Get(i));
+  const GrowableArray<HBasicBlock*>& blocks = graph_->GetBlocks();
+  for (size_t i = 0 ; i < blocks.Size(); i++) {
+    VisitBasicBlock(blocks.Get(i));
   }
 }
 
 void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) {
-  for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     it.Current()->Accept(this);
   }
-  for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     it.Current()->Accept(this);
   }
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 581c1d5..476f24e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_H_
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
+#include "locations.h"
 #include "utils/allocation.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
@@ -29,6 +30,7 @@
 class HIntConstant;
 class HGraphVisitor;
 class HPhi;
+class LiveInterval;
 class LocationSummary;
 
 static const int kDefaultNumberOfBlocks = 8;
@@ -49,6 +51,7 @@
 
   friend class HBasicBlock;
   friend class HInstructionIterator;
+  friend class HBackwardInstructionIterator;
 
   DISALLOW_COPY_AND_ASSIGN(HInstructionList);
 };
@@ -59,14 +62,14 @@
   explicit HGraph(ArenaAllocator* arena)
       : arena_(arena),
         blocks_(arena, kDefaultNumberOfBlocks),
-        dominator_order_(arena, kDefaultNumberOfBlocks),
+        reverse_post_order_(arena, kDefaultNumberOfBlocks),
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
         current_instruction_id_(0) { }
 
   ArenaAllocator* GetArena() const { return arena_; }
-  const GrowableArray<HBasicBlock*>* GetBlocks() const { return &blocks_; }
+  const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
@@ -80,6 +83,14 @@
   void TransformToSSA();
   void SimplifyCFG();
 
+  // Find all natural loops in this graph. Aborts computation and returns false
+  // if one loop is not natural, that is the header does not dominate the back
+  // edge.
+  bool FindNaturalLoops() const;
+
+  void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
+  void SimplifyLoop(HBasicBlock* header);
+
   int GetNextInstructionId() {
     return current_instruction_id_++;
   }
@@ -108,8 +119,8 @@
     return number_of_in_vregs_;
   }
 
-  GrowableArray<HBasicBlock*>* GetDominatorOrder() {
-    return &dominator_order_;
+  const GrowableArray<HBasicBlock*>& GetReversePostOrder() const {
+    return reverse_post_order_;
   }
 
  private:
@@ -117,10 +128,10 @@
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
                                   GrowableArray<size_t>* visits);
-  void FindBackEdges(ArenaBitVector* visited) const;
+  void FindBackEdges(ArenaBitVector* visited);
   void VisitBlockForBackEdges(HBasicBlock* block,
                               ArenaBitVector* visited,
-                              ArenaBitVector* visiting) const;
+                              ArenaBitVector* visiting);
   void RemoveDeadBlocks(const ArenaBitVector& visited) const;
 
   ArenaAllocator* const arena_;
@@ -128,8 +139,8 @@
   // List of blocks in insertion order.
   GrowableArray<HBasicBlock*> blocks_;
 
-  // List of blocks to perform a pre-order dominator tree traversal.
-  GrowableArray<HBasicBlock*> dominator_order_;
+  // List of blocks to perform a reverse post order tree traversal.
+  GrowableArray<HBasicBlock*> reverse_post_order_;
 
   HBasicBlock* entry_block_;
   HBasicBlock* exit_block_;
@@ -153,34 +164,69 @@
  public:
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
-        back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges) { }
+        back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges),
+        blocks_(graph->GetArena(), graph->GetBlocks().Size(), false) {}
+
+  HBasicBlock* GetHeader() const {
+    return header_;
+  }
 
   void AddBackEdge(HBasicBlock* back_edge) {
     back_edges_.Add(back_edge);
   }
 
+  void RemoveBackEdge(HBasicBlock* back_edge) {
+    back_edges_.Delete(back_edge);
+  }
+
+  bool IsBackEdge(HBasicBlock* block) {
+    for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+      if (back_edges_.Get(i) == block) return true;
+    }
+    return false;
+  }
+
   int NumberOfBackEdges() const {
     return back_edges_.Size();
   }
 
-  void SetPreHeader(HBasicBlock* block);
+  HBasicBlock* GetPreHeader() const;
 
-  HBasicBlock* GetPreHeader() const {
-    return pre_header_;
+  const GrowableArray<HBasicBlock*>& GetBackEdges() const {
+    return back_edges_;
   }
 
-  const GrowableArray<HBasicBlock*>* GetBackEdges() const {
-    return &back_edges_;
+  void ClearBackEdges() {
+    back_edges_.Reset();
   }
 
+  // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+  // that is the header dominates the back edge.
+  bool Populate();
+
+  // Returns whether this loop information contains `block`.
+  // Note that this loop information *must* be populated before entering this function.
+  bool Contains(const HBasicBlock& block) const;
+
+  // Returns whether this loop information is an inner loop of `other`.
+  // Note that `other` *must* be populated before entering this function.
+  bool IsIn(const HLoopInformation& other) const;
+
+  const ArenaBitVector& GetBlocks() const { return blocks_; }
+
  private:
-  HBasicBlock* pre_header_;
+  // Internal recursive implementation of `Populate`.
+  void PopulateRecursive(HBasicBlock* block);
+
   HBasicBlock* header_;
   GrowableArray<HBasicBlock*> back_edges_;
+  ArenaBitVector blocks_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoopInformation);
 };
 
+static constexpr size_t kNoLifetime = -1;
+
 // A block in a method. Contains the list of instructions represented
 // as a double linked list. Each block knows its predecessors and
 // successors.
@@ -192,20 +238,23 @@
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
         loop_information_(nullptr),
         dominator_(nullptr),
-        block_id_(-1) { }
+        block_id_(-1),
+        lifetime_start_(kNoLifetime),
+        lifetime_end_(kNoLifetime) {}
 
-  const GrowableArray<HBasicBlock*>* GetPredecessors() const {
-    return &predecessors_;
+  const GrowableArray<HBasicBlock*>& GetPredecessors() const {
+    return predecessors_;
   }
 
-  const GrowableArray<HBasicBlock*>* GetSuccessors() const {
-    return &successors_;
+  const GrowableArray<HBasicBlock*>& GetSuccessors() const {
+    return successors_;
   }
 
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
     }
+    DCHECK_EQ(loop_information_->GetHeader(), this);
     loop_information_->AddBackEdge(back_edge);
   }
 
@@ -225,8 +274,8 @@
 
   HInstruction* GetFirstInstruction() const { return instructions_.first_instruction_; }
   HInstruction* GetLastInstruction() const { return instructions_.last_instruction_; }
-  HInstructionList const* GetInstructions() const { return &instructions_; }
-  HInstructionList const* GetPhis() const { return &phis_; }
+  const HInstructionList& GetInstructions() const { return instructions_; }
+  const HInstructionList& GetPhis() const { return phis_; }
 
   void AddSuccessor(HBasicBlock* block) {
     successors_.Add(block);
@@ -240,19 +289,73 @@
     }
   }
 
+  void RemoveSuccessor(HBasicBlock* block, bool remove_in_predecessor = true) {
+    successors_.Delete(block);
+    if (remove_in_predecessor) {
+      block->predecessors_.Delete(this);
+    }
+  }
+
+  void ClearAllPredecessors() {
+    predecessors_.Reset();
+  }
+
+  void AddPredecessor(HBasicBlock* block) {
+    predecessors_.Add(block);
+    block->successors_.Add(this);
+  }
+
+  size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
+    for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
+      if (predecessors_.Get(i) == predecessor) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   void AddInstruction(HInstruction* instruction);
   void RemoveInstruction(HInstruction* instruction);
+  void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
   void AddPhi(HPhi* phi);
   void RemovePhi(HPhi* phi);
 
   bool IsLoopHeader() const {
-    return loop_information_ != nullptr;
+    return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
   }
 
   HLoopInformation* GetLoopInformation() const {
     return loop_information_;
   }
 
+  // Set the loop_information_ on this block. This method overrides the current
+  // loop_information if it is an outer loop of the passed loop information.
+  void SetInLoop(HLoopInformation* info) {
+    if (IsLoopHeader()) {
+      // Nothing to do. This just means `info` is an outer loop.
+    } else if (loop_information_ == nullptr) {
+      loop_information_ = info;
+    } else if (loop_information_->Contains(*info->GetHeader())) {
+      // Block is currently part of an outer loop. Make it part of this inner loop.
+      // Note that a non loop header having a loop information means this loop information
+      // has already been populated
+      loop_information_ = info;
+    } else {
+      // Block is part of an inner loop. Do not update the loop information.
+      // Note that we cannot do the check `info->Contains(loop_information_)->GetHeader()`
+      // at this point, because this method is being called while populating `info`.
+    }
+  }
+
+  // Returns wheter this block dominates the blocked passed as parameter.
+  bool Dominates(HBasicBlock* block) const;
+
+  size_t GetLifetimeStart() const { return lifetime_start_; }
+  size_t GetLifetimeEnd() const { return lifetime_end_; }
+
+  void SetLifetimeStart(size_t start) { lifetime_start_ = start; }
+  void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
+
  private:
   HGraph* const graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -262,6 +365,8 @@
   HLoopInformation* loop_information_;
   HBasicBlock* dominator_;
   int block_id_;
+  size_t lifetime_start_;
+  size_t lifetime_end_;
 
   DISALLOW_COPY_AND_ASSIGN(HBasicBlock);
 };
@@ -280,6 +385,7 @@
   M(NewInstance)                                           \
   M(Not)                                                   \
   M(ParameterValue)                                        \
+  M(ParallelMove)                                          \
   M(Phi)                                                   \
   M(Return)                                                \
   M(ReturnVoid)                                            \
@@ -322,10 +428,13 @@
         next_(nullptr),
         block_(nullptr),
         id_(-1),
+        ssa_index_(-1),
         uses_(nullptr),
         env_uses_(nullptr),
         environment_(nullptr),
-        locations_(nullptr) { }
+        locations_(nullptr),
+        live_interval_(nullptr),
+        lifetime_position_(kNoLifetime) {}
 
   virtual ~HInstruction() { }
 
@@ -360,11 +469,28 @@
   HUseListNode<HInstruction>* GetUses() const { return uses_; }
   HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; }
 
-  bool HasUses() const { return uses_ != nullptr; }
+  bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; }
+
+  size_t NumberOfUses() const {
+    // TODO: Optimize this method if it is used outside of the HGraphVisualizer.
+    size_t result = 0;
+    HUseListNode<HInstruction>* current = uses_;
+    while (current != nullptr) {
+      current = current->GetTail();
+      ++result;
+    }
+    return result;
+  }
 
   int GetId() const { return id_; }
   void SetId(int id) { id_ = id; }
 
+  int GetSsaIndex() const { return ssa_index_; }
+  void SetSsaIndex(int ssa_index) { ssa_index_ = ssa_index; }
+  bool HasSsaIndex() const { return ssa_index_ != -1; }
+
+  bool HasEnvironment() const { return environment_ != nullptr; }
+  HEnvironment* GetEnvironment() const { return environment_; }
   void SetEnvironment(HEnvironment* environment) { environment_ = environment; }
 
   LocationSummary* GetLocations() const { return locations_; }
@@ -378,6 +504,12 @@
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
+  size_t GetLifetimePosition() const { return lifetime_position_; }
+  void SetLifetimePosition(size_t position) { lifetime_position_ = position; }
+  LiveInterval* GetLiveInterval() const { return live_interval_; }
+  void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; }
+  bool HasLiveInterval() const { return live_interval_ != nullptr; }
+
  private:
   HInstruction* previous_;
   HInstruction* next_;
@@ -388,6 +520,9 @@
   // has not beed added to the graph.
   int id_;
 
+  // When doing liveness analysis, instructions that have uses get an SSA index.
+  int ssa_index_;
+
   // List of instructions that have this instruction as input.
   HUseListNode<HInstruction>* uses_;
 
@@ -399,6 +534,13 @@
   // Set by the code generator.
   LocationSummary* locations_;
 
+  // Set by the liveness analysis.
+  LiveInterval* live_interval_;
+
+  // Set by the liveness analysis, this is the position in a linear
+  // order of blocks where this instruction's live interval start.
+  size_t lifetime_position_;
+
   friend class HBasicBlock;
   friend class HInstructionList;
 
@@ -494,6 +636,29 @@
  private:
   HInstruction* instruction_;
   HInstruction* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionIterator);
+};
+
+class HBackwardInstructionIterator : public ValueObject {
+ public:
+  explicit HBackwardInstructionIterator(const HInstructionList& instructions)
+      : instruction_(instructions.last_instruction_) {
+    next_ = Done() ? nullptr : instruction_->GetPrevious();
+  }
+
+  bool Done() const { return instruction_ == nullptr; }
+  HInstruction* Current() const { return instruction_; }
+  void Advance() {
+    instruction_ = next_;
+    next_ = Done() ? nullptr : instruction_->GetPrevious();
+  }
+
+ private:
+  HInstruction* instruction_;
+  HInstruction* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBackwardInstructionIterator);
 };
 
 // An embedded container with N elements of type T.  Used (with partial
@@ -608,7 +773,7 @@
   HGoto() { }
 
   HBasicBlock* GetSuccessor() const {
-    return GetBlock()->GetSuccessors()->Get(0);
+    return GetBlock()->GetSuccessors().Get(0);
   }
 
   DECLARE_INSTRUCTION(Goto)
@@ -626,11 +791,11 @@
   }
 
   HBasicBlock* IfTrueSuccessor() const {
-    return GetBlock()->GetSuccessors()->Get(0);
+    return GetBlock()->GetSuccessors().Get(0);
   }
 
   HBasicBlock* IfFalseSuccessor() const {
-    return GetBlock()->GetSuccessors()->Get(1);
+    return GetBlock()->GetSuccessors().Get(1);
   }
 
   DECLARE_INSTRUCTION(If)
@@ -940,6 +1105,88 @@
   DISALLOW_COPY_AND_ASSIGN(HPhi);
 };
 
+class MoveOperands : public ArenaObject {
+ public:
+  MoveOperands(Location source, Location destination)
+      : source_(source), destination_(destination) {}
+
+  Location GetSource() const { return source_; }
+  Location GetDestination() const { return destination_; }
+
+  void SetSource(Location value) { source_ = value; }
+  void SetDestination(Location value) { destination_ = value; }
+
+  // The parallel move resolver marks moves as "in-progress" by clearing the
+  // destination (but not the source).
+  Location MarkPending() {
+    DCHECK(!IsPending());
+    Location dest = destination_;
+    destination_ = Location::NoLocation();
+    return dest;
+  }
+
+  void ClearPending(Location dest) {
+    DCHECK(IsPending());
+    destination_ = dest;
+  }
+
+  bool IsPending() const {
+    DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
+    return destination_.IsInvalid() && !source_.IsInvalid();
+  }
+
+  // True if this blocks a move from the given location.
+  bool Blocks(Location loc) const {
+    return !IsEliminated() && source_.Equals(loc);
+  }
+
+  // A move is redundant if it's been eliminated, if its source and
+  // destination are the same, or if its destination is unneeded.
+  bool IsRedundant() const {
+    return IsEliminated() || destination_.IsInvalid() || source_.Equals(destination_);
+  }
+
+  // We clear both operands to indicate move that's been eliminated.
+  void Eliminate() {
+    source_ = destination_ = Location::NoLocation();
+  }
+
+  bool IsEliminated() const {
+    DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
+    return source_.IsInvalid();
+  }
+
+ private:
+  Location source_;
+  Location destination_;
+
+  DISALLOW_COPY_AND_ASSIGN(MoveOperands);
+};
+
+static constexpr size_t kDefaultNumberOfMoves = 4;
+
+class HParallelMove : public HTemplateInstruction<0> {
+ public:
+  explicit HParallelMove(ArenaAllocator* arena) : moves_(arena, kDefaultNumberOfMoves) {}
+
+  void AddMove(MoveOperands* move) {
+    moves_.Add(move);
+  }
+
+  MoveOperands* MoveOperandsAt(size_t index) const {
+    return moves_.Get(index);
+  }
+
+  size_t NumMoves() const { return moves_.Size(); }
+
+  DECLARE_INSTRUCTION(ParallelMove)
+
+ private:
+  GrowableArray<MoveOperands*> moves_;
+
+  DISALLOW_COPY_AND_ASSIGN(HParallelMove);
+};
+
 class HGraphVisitor : public ValueObject {
  public:
   explicit HGraphVisitor(HGraph* graph) : graph_(graph) { }
@@ -966,6 +1213,52 @@
   DISALLOW_COPY_AND_ASSIGN(HGraphVisitor);
 };
 
+class HInsertionOrderIterator : public ValueObject {
+ public:
+  explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+
+  bool Done() const { return index_ == graph_.GetBlocks().Size(); }
+  HBasicBlock* Current() const { return graph_.GetBlocks().Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator);
+};
+
+class HReversePostOrderIterator : public ValueObject {
+ public:
+  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
+
+  bool Done() const { return index_ == graph_.GetReversePostOrder().Size(); }
+  HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator);
+};
+
+class HPostOrderIterator : public ValueObject {
+ public:
+  explicit HPostOrderIterator(const HGraph& graph)
+      : graph_(graph), index_(graph_.GetReversePostOrder().Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return graph_.GetReversePostOrder().Get(index_ - 1); }
+  void Advance() { --index_; }
+
+ private:
+  const HGraph& graph_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 9438890..286f48a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <fstream>
 #include <stdint.h>
 
 #include "builder.h"
@@ -21,7 +22,9 @@
 #include "compilers.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "graph_visualizer.h"
 #include "nodes.h"
+#include "ssa_liveness_analysis.h"
 #include "utils/arena_allocator.h"
 
 namespace art {
@@ -49,9 +52,24 @@
   DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
 };
 
+/**
+ * If set to true, generates a file suitable for the c1visualizer tool and IRHydra.
+ */
+static bool kIsVisualizerEnabled = false;
 
-CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver,
-                                               const DexFile::CodeItem* code_item,
+/**
+ * Filter to apply to the visualizer. Methods whose name contain that filter will
+ * be in the file.
+ */
+static const char* kStringFilter = "";
+
+OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : QuickCompiler(driver) {
+  if (kIsVisualizerEnabled) {
+    visualizer_output_.reset(new std::ofstream("art.cfg"));
+  }
+}
+
+CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
                                                uint32_t access_flags,
                                                InvokeType invoke_type,
                                                uint16_t class_def_idx,
@@ -60,7 +78,8 @@
                                                const DexFile& dex_file) const {
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
-    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+    class_def_idx, method_idx, access_flags,
+    GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx));
 
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
@@ -69,6 +88,7 @@
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file);
+
   HGraph* graph = builder.BuildGraph(*code_item);
   if (graph == nullptr) {
     if (shouldCompile) {
@@ -76,8 +96,10 @@
     }
     return nullptr;
   }
+  HGraphVisualizer visualizer(visualizer_output_.get(), graph, kStringFilter, dex_compilation_unit);
+  visualizer.DumpGraph("builder");
 
-  InstructionSet instruction_set = driver.GetInstructionSet();
+  InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
   // The optimizing compiler currently does not have a Thumb2 assembler.
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
@@ -103,8 +125,13 @@
   // Run these phases to get some test coverage.
   graph->BuildDominatorTree();
   graph->TransformToSSA();
+  visualizer.DumpGraph("ssa");
 
-  return new CompiledMethod(driver,
+  graph->FindNaturalLoops();
+  SsaLivenessAnalysis(*graph).Analyze();
+  visualizer.DumpGraph("liveness");
+
+  return new CompiledMethod(GetCompilerDriver(),
                             instruction_set,
                             allocator.GetMemory(),
                             codegen->GetFrameSize(),
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
new file mode 100644
index 0000000..3d2d136
--- /dev/null
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parallel_move_resolver.h"
+#include "nodes.h"
+#include "locations.h"
+
+namespace art {
+
+void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+  DCHECK(moves_.IsEmpty());
+  // Build up a worklist of moves.
+  BuildInitialMoveList(parallel_move);
+
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& move = *moves_.Get(i);
+    // Skip constants to perform them last.  They don't block other moves
+    // and skipping such moves with register destinations keeps those
+    // registers free for the whole algorithm.
+    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+      PerformMove(i);
+    }
+  }
+
+  // Perform the moves with constant sources.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& move = *moves_.Get(i);
+    if (!move.IsEliminated()) {
+      DCHECK(move.GetSource().IsConstant());
+      EmitMove(i);
+    }
+  }
+
+  moves_.Reset();
+}
+
+
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+  // Perform a linear sweep of the moves to add them to the initial list of
+  // moves to perform, ignoring any move that is redundant (the source is
+  // the same as the destination, the destination is ignored and
+  // unallocated, or the move was already eliminated).
+  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+    MoveOperands* move = parallel_move->MoveOperandsAt(i);
+    if (!move->IsRedundant()) {
+      moves_.Add(move);
+    }
+  }
+}
+
+
+void ParallelMoveResolver::PerformMove(size_t index) {
+  // Each call to this function performs a move and deletes it from the move
+  // graph.  We first recursively perform any move blocking this one.  We
+  // mark a move as "pending" on entry to PerformMove in order to detect
+  // cycles in the move graph.  We use operand swaps to resolve cycles,
+  // which means that a call to PerformMove could change any source operand
+  // in the move graph.
+
+  DCHECK(!moves_.Get(index)->IsPending());
+  DCHECK(!moves_.Get(index)->IsRedundant());
+
+  // Clear this move's destination to indicate a pending move.  The actual
+  // destination is saved in a stack-allocated local.  Recursion may allow
+  // multiple moves to be pending.
+  DCHECK(!moves_.Get(index)->GetSource().IsInvalid());
+  Location destination = moves_.Get(index)->MarkPending();
+
+  // Perform a depth-first traversal of the move graph to resolve
+  // dependencies.  Any unperformed, unpending move with a source the same
+  // as this one's destination blocks this one so recursively perform all
+  // such moves.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& other_move = *moves_.Get(i);
+    if (other_move.Blocks(destination) && !other_move.IsPending()) {
+      // Though PerformMove can change any source operand in the move graph,
+      // this call cannot create a blocking move via a swap (this loop does
+      // not miss any).  Assume there is a non-blocking move with source A
+      // and this move is blocked on source B and there is a swap of A and
+      // B.  Then A and B must be involved in the same cycle (or they would
+      // not be swapped).  Since this move's destination is B and there is
+      // only a single incoming edge to an operand, this move must also be
+      // involved in the same cycle.  In that case, the blocking move will
+      // be created but will be "pending" when we return from PerformMove.
+      PerformMove(i);
+    }
+  }
+  MoveOperands* move = moves_.Get(index);
+
+  // We are about to resolve this move and don't need it marked as
+  // pending, so restore its destination.
+  move->ClearPending(destination);
+
+  // This move's source may have changed due to swaps to resolve cycles and
+  // so it may now be the last move in the cycle.  If so remove it.
+  if (move->GetSource().Equals(destination)) {
+    move->Eliminate();
+    return;
+  }
+
+  // The move may be blocked on a (at most one) pending move, in which case
+  // we have a cycle.  Search for such a blocking move and perform a swap to
+  // resolve it.
+  bool do_swap = false;
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& other_move = *moves_.Get(i);
+    if (other_move.Blocks(destination)) {
+      DCHECK(other_move.IsPending());
+      do_swap = true;
+      break;
+    }
+  }
+
+  if (do_swap) {
+    EmitSwap(index);
+    // Any unperformed (including pending) move with a source of either
+    // this move's source or destination needs to have their source
+    // changed to reflect the state of affairs after the swap.
+    Location source = move->GetSource();
+    Location destination = move->GetDestination();
+    move->Eliminate();
+    for (size_t i = 0; i < moves_.Size(); ++i) {
+      const MoveOperands& other_move = *moves_.Get(i);
+      if (other_move.Blocks(source)) {
+        moves_.Get(i)->SetSource(destination);
+      } else if (other_move.Blocks(destination)) {
+        moves_.Get(i)->SetSource(source);
+      }
+    }
+  } else {
+    // This move is not blocked.
+    EmitMove(index);
+    move->Eliminate();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
new file mode 100644
index 0000000..ff20cb0
--- /dev/null
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
+
+#include "utils/allocation.h"
+#include "utils/growable_array.h"
+
+namespace art {
+
+class HParallelMove;
+class MoveOperands;
+
+/**
+ * Helper class to resolve a set of parallel moves. Architecture dependent code
+ * generator must have their own subclass that implements the `EmitMove` and `EmitSwap`
+ * operations.
+ */
+class ParallelMoveResolver : public ValueObject {
+ public:
+  explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
+  virtual ~ParallelMoveResolver() {}
+
+  // Resolve a set of parallel moves, emitting assembler instructions.
+  void EmitNativeCode(HParallelMove* parallel_move);
+
+ protected:
+  // Emit a move.
+  virtual void EmitMove(size_t index) = 0;
+
+  // Execute a move by emitting a swap of two operands.
+  virtual void EmitSwap(size_t index) = 0;
+
+  // List of moves not yet resolved.
+  GrowableArray<MoveOperands*> moves_;
+
+ private:
+  // Build the initial list of moves.
+  void BuildInitialMoveList(HParallelMove* parallel_move);
+
+  // Perform the move at the moves_ index in question (possibly requiring
+  // other moves to satisfy dependencies).
+  void PerformMove(size_t index);
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
new file mode 100644
index 0000000..88df24d
--- /dev/null
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class TestParallelMoveResolver : public ParallelMoveResolver {
+ public:
+  explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {}
+
+  virtual void EmitMove(size_t index) {
+    MoveOperands* move = moves_.Get(index);
+    if (!message_.str().empty()) {
+      message_ << " ";
+    }
+    message_ << "("
+             << move->GetSource().reg().RegId()
+             << " -> "
+             << move->GetDestination().reg().RegId()
+             << ")";
+  }
+
+  virtual void EmitSwap(size_t index) {
+    MoveOperands* move = moves_.Get(index);
+    if (!message_.str().empty()) {
+      message_ << " ";
+    }
+    message_ << "("
+             << move->GetSource().reg().RegId()
+             << " <-> "
+             << move->GetDestination().reg().RegId()
+             << ")";
+  }
+
+  std::string GetMessage() const {
+    return  message_.str();
+  }
+
+ private:
+  std::ostringstream message_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver);
+};
+
+static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
+                                        const size_t operands[][2],
+                                        size_t number_of_moves) {
+  HParallelMove* moves = new (allocator) HParallelMove(allocator);
+  for (size_t i = 0; i < number_of_moves; ++i) {
+    moves->AddMove(new (allocator) MoveOperands(
+        Location::RegisterLocation(ManagedRegister(operands[i][0])),
+        Location::RegisterLocation(ManagedRegister(operands[i][1]))));
+  }
+  return moves;
+}
+
+TEST(ParallelMoveTest, Dependency) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+  }
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+  }
+}
+
+TEST(ParallelMoveTest, Swap) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 0}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+  }
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+  }
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str());
+  }
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}};
+    resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+    ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str());
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index c82d0cc..a7727c0 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -70,23 +70,23 @@
   virtual void VisitBasicBlock(HBasicBlock* block) {
     PrintString("BasicBlock ");
     PrintInt(block->GetBlockId());
-    const GrowableArray<HBasicBlock*>* blocks = block->GetPredecessors();
-    if (!blocks->IsEmpty()) {
+    const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
+    if (!predecessors.IsEmpty()) {
       PrintString(", pred: ");
-      for (size_t i = 0; i < blocks->Size() -1; i++) {
-        PrintInt(blocks->Get(i)->GetBlockId());
+      for (size_t i = 0; i < predecessors.Size() -1; i++) {
+        PrintInt(predecessors.Get(i)->GetBlockId());
         PrintString(", ");
       }
-      PrintInt(blocks->Peek()->GetBlockId());
+      PrintInt(predecessors.Peek()->GetBlockId());
     }
-    blocks = block->GetSuccessors();
-    if (!blocks->IsEmpty()) {
+    const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors();
+    if (!successors.IsEmpty()) {
       PrintString(", succ: ");
-      for (size_t i = 0; i < blocks->Size() - 1; i++) {
-        PrintInt(blocks->Get(i)->GetBlockId());
+      for (size_t i = 0; i < successors.Size() - 1; i++) {
+        PrintInt(successors.Get(i)->GetBlockId());
         PrintString(", ");
       }
-      PrintInt(blocks->Peek()->GetBlockId());
+      PrintInt(successors.Peek()->GetBlockId());
     }
     PrintNewLine();
     HGraphVisitor::VisitBasicBlock(block);
@@ -100,6 +100,47 @@
   DISALLOW_COPY_AND_ASSIGN(HPrettyPrinter);
 };
 
+class StringPrettyPrinter : public HPrettyPrinter {
+ public:
+  explicit StringPrettyPrinter(HGraph* graph)
+      : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
+
+  virtual void PrintInt(int value) {
+    str_ += StringPrintf("%d", value);
+  }
+
+  virtual void PrintString(const char* value) {
+    str_ += value;
+  }
+
+  virtual void PrintNewLine() {
+    str_ += '\n';
+  }
+
+  void Clear() { str_.clear(); }
+
+  std::string str() const { return str_; }
+
+  virtual void VisitBasicBlock(HBasicBlock* block) {
+    current_block_ = block;
+    HPrettyPrinter::VisitBasicBlock(block);
+  }
+
+  virtual void VisitGoto(HGoto* gota) {
+    PrintString("  ");
+    PrintInt(gota->GetId());
+    PrintString(": Goto ");
+    PrintInt(current_block_->GetSuccessors().Get(0)->GetBlockId());
+    PrintNewLine();
+  }
+
+ private:
+  std::string str_;
+  HBasicBlock* current_block_;
+
+  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 04db7a6..7e604e9 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -27,47 +27,6 @@
 
 namespace art {
 
-class StringPrettyPrinter : public HPrettyPrinter {
- public:
-  explicit StringPrettyPrinter(HGraph* graph)
-      : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
-
-  virtual void PrintInt(int value) {
-    str_ += StringPrintf("%d", value);
-  }
-
-  virtual void PrintString(const char* value) {
-    str_ += value;
-  }
-
-  virtual void PrintNewLine() {
-    str_ += '\n';
-  }
-
-  void Clear() { str_.clear(); }
-
-  std::string str() const { return str_; }
-
-  virtual void VisitBasicBlock(HBasicBlock* block) {
-    current_block_ = block;
-    HPrettyPrinter::VisitBasicBlock(block);
-  }
-
-  virtual void VisitGoto(HGoto* gota) {
-    PrintString("  ");
-    PrintInt(gota->GetId());
-    PrintString(": Goto ");
-    PrintInt(current_block_->GetSuccessors()->Get(0)->GetBlockId());
-    PrintNewLine();
-  }
-
- private:
-  std::string str_;
-  HBasicBlock* current_block_;
-
-  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
-};
-
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index bfb4f38..50e3254 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -20,27 +20,27 @@
 namespace art {
 
 void SsaBuilder::BuildSsa() {
-  // 1) Visit in dominator order. We need to have all predecessors of a block visited
+  // 1) Visit in reverse post order. We need to have all predecessors of a block visited
   // (with the exception of loops) in order to create the right environment for that
   // block. For loops, we create phis whose inputs will be set in 2).
-  for (size_t i = 0; i < GetGraph()->GetDominatorOrder()->Size(); i++) {
-    VisitBasicBlock(GetGraph()->GetDominatorOrder()->Get(i));
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
   }
 
   // 2) Set inputs of loop phis.
   for (size_t i = 0; i < loop_headers_.Size(); i++) {
     HBasicBlock* block = loop_headers_.Get(i);
-    for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      for (size_t pred = 0; pred < block->GetPredecessors()->Size(); pred++) {
-        phi->AddInput(ValueOfLocal(block->GetPredecessors()->Get(pred), phi->GetRegNumber()));
+      for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) {
+        phi->AddInput(ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber()));
       }
     }
   }
 
   // 3) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
-  for (HInstructionIterator it(*GetGraph()->GetEntryBlock()->GetInstructions());
+  for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
     HInstruction* current = it.Current();
@@ -59,7 +59,7 @@
 
   if (block->IsLoopHeader()) {
     // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in dominator order. We create phis for all initialized
+    // because we are visiting in reverse post order. We create phis for all initialized
     // locals from the pre header. Their inputs will be populated at the end of
     // the analysis.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
@@ -75,14 +75,14 @@
     // Save the loop header so that the last phase of the analysis knows which
     // blocks need to be updated.
     loop_headers_.Add(block);
-  } else if (block->GetPredecessors()->Size() > 0) {
-    // All predecessors have already been visited because we are visiting in dominator order.
+  } else if (block->GetPredecessors().Size() > 0) {
+    // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
       bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors()->Get(0), local);
-      for (size_t i = 1; i < block->GetPredecessors()->Size(); i++) {
-        if (ValueOfLocal(block->GetPredecessors()->Get(i), local) != value) {
+      HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local);
+      for (size_t i = 1; i < block->GetPredecessors().Size(); i++) {
+        if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) {
           is_different = true;
           break;
         }
@@ -90,9 +90,9 @@
       if (is_different) {
         // TODO: Compute union type.
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors()->Size(), Primitive::kPrimVoid);
-        for (size_t i = 0; i < block->GetPredecessors()->Size(); i++) {
-          phi->SetRawInputAt(i, ValueOfLocal(block->GetPredecessors()->Get(i), local));
+            GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
+        for (size_t i = 0; i < block->GetPredecessors().Size(); i++) {
+          phi->SetRawInputAt(i, ValueOfLocal(block->GetPredecessors().Get(i), local));
         }
         block->AddPhi(phi);
         value = phi;
@@ -106,7 +106,7 @@
   // - HStoreLocal: update current value of the local and remove the instruction.
   // - Instructions that require an environment: populate their environment
   //   with the current values of the locals.
-  for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     it.Current()->Accept(this);
   }
 }
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index b6c6c0b..9d8c072 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -29,8 +29,8 @@
       : HGraphVisitor(graph),
         current_locals_(nullptr),
         loop_headers_(graph->GetArena(), kDefaultNumberOfLoops),
-        locals_for_(graph->GetArena(), graph->GetBlocks()->Size()) {
-    locals_for_.SetSize(graph->GetBlocks()->Size());
+        locals_for_(graph->GetArena(), graph->GetBlocks().Size()) {
+    locals_for_.SetSize(graph->GetBlocks().Size());
   }
 
   void BuildSsa();
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
new file mode 100644
index 0000000..938c5ec
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ssa_liveness_analysis.h"
+#include "nodes.h"
+
+namespace art {
+
+void SsaLivenessAnalysis::Analyze() {
+  LinearizeGraph();
+  NumberInstructions();
+  ComputeLiveness();
+}
+
+static bool IsLoopExit(HLoopInformation* current, HLoopInformation* to) {
+  // `to` is either not part of a loop, or `current` is an inner loop of `to`.
+  return to == nullptr || (current != to && current->IsIn(*to));
+}
+
+static bool IsLoop(HLoopInformation* info) {
+  return info != nullptr;
+}
+
+static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
+  return first_loop == second_loop;
+}
+
+static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) {
+  return (inner != outer)
+      && (inner != nullptr)
+      && (outer != nullptr)
+      && inner->IsIn(*outer);
+}
+
+static void VisitBlockForLinearization(HBasicBlock* block,
+                                       GrowableArray<HBasicBlock*>* order,
+                                       ArenaBitVector* visited) {
+  if (visited->IsBitSet(block->GetBlockId())) {
+    return;
+  }
+  visited->SetBit(block->GetBlockId());
+  size_t number_of_successors = block->GetSuccessors().Size();
+  if (number_of_successors == 0) {
+    // Nothing to do.
+  } else if (number_of_successors == 1) {
+    VisitBlockForLinearization(block->GetSuccessors().Get(0), order, visited);
+  } else {
+    DCHECK_EQ(number_of_successors, 2u);
+    HBasicBlock* first_successor = block->GetSuccessors().Get(0);
+    HBasicBlock* second_successor = block->GetSuccessors().Get(1);
+    HLoopInformation* my_loop = block->GetLoopInformation();
+    HLoopInformation* first_loop = first_successor->GetLoopInformation();
+    HLoopInformation* second_loop = second_successor->GetLoopInformation();
+
+    if (!IsLoop(my_loop)) {
+      // Nothing to do. Current order is fine.
+    } else if (IsLoopExit(my_loop, second_loop) && InSameLoop(my_loop, first_loop)) {
+      // Visit the loop exit first in post order.
+      std::swap(first_successor, second_successor);
+    } else if (IsInnerLoop(my_loop, first_loop) && !IsInnerLoop(my_loop, second_loop)) {
+      // Visit the inner loop last in post order.
+      std::swap(first_successor, second_successor);
+    }
+    VisitBlockForLinearization(first_successor, order, visited);
+    VisitBlockForLinearization(second_successor, order, visited);
+  }
+  order->Add(block);
+}
+
+class HLinearOrderIterator : public ValueObject {
+ public:
+  explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
+      : post_order_(post_order), index_(post_order.Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
+  void Advance() { --index_; DCHECK_GE(index_, 0U); }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
+class HLinearPostOrderIterator : public ValueObject {
+ public:
+  explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
+      : post_order_(post_order), index_(0) {}
+
+  bool Done() const { return index_ == post_order_.Size(); }
+  HBasicBlock* Current() const { return post_order_.Get(index_); }
+  void Advance() { ++index_; }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
+};
+
+void SsaLivenessAnalysis::LinearizeGraph() {
+  // For simplicity of the implementation, we create post linear order. The order for
+  // computing live ranges is the reverse of that order.
+  ArenaBitVector visited(graph_.GetArena(), graph_.GetBlocks().Size(), false);
+  VisitBlockForLinearization(graph_.GetEntryBlock(), &linear_post_order_, &visited);
+}
+
+void SsaLivenessAnalysis::NumberInstructions() {
+  int ssa_index = 0;
+  size_t lifetime_position = 0;
+  // Each instruction gets an individual lifetime position, and a block gets a lifetime
+  // start and end position. Non-phi instructions have a distinct lifetime position than
+  // the block they are in. Phi instructions have the lifetime start of their block as
+  // lifetime position
+  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    block->SetLifetimeStart(++lifetime_position);
+
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasUses()) {
+        instructions_from_ssa_index_.Add(current);
+        current->SetSsaIndex(ssa_index++);
+        current->SetLiveInterval(new (graph_.GetArena()) LiveInterval(graph_.GetArena()));
+      }
+      current->SetLifetimePosition(lifetime_position);
+    }
+
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasUses()) {
+        instructions_from_ssa_index_.Add(current);
+        current->SetSsaIndex(ssa_index++);
+        current->SetLiveInterval(new (graph_.GetArena()) LiveInterval(graph_.GetArena()));
+      }
+      current->SetLifetimePosition(++lifetime_position);
+    }
+
+    block->SetLifetimeEnd(++lifetime_position);
+  }
+  number_of_ssa_values_ = ssa_index;
+}
+
+void SsaLivenessAnalysis::ComputeLiveness() {
+  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    block_infos_.Put(
+        block->GetBlockId(),
+        new (graph_.GetArena()) BlockInfo(graph_.GetArena(), *block, number_of_ssa_values_));
+  }
+
+  // Compute the live ranges, as well as the initial live_in, live_out, and kill sets.
+  // This method does not handle backward branches for the sets, therefore live_in
+  // and live_out sets are not yet correct.
+  ComputeLiveRanges();
+
+  // Do a fixed point calculation to take into account backward branches,
+  // that will update live_in of loop headers, and therefore live_out and live_in
+  // of blocks in the loop.
+  ComputeLiveInAndLiveOutSets();
+}
+
+void SsaLivenessAnalysis::ComputeLiveRanges() {
+  // Do a post order visit, adding inputs of instructions live in the block where
+  // that instruction is defined, and killing instructions that are being visited.
+  for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    BitVector* kill = GetKillSet(*block);
+    BitVector* live_in = GetLiveInSet(*block);
+
+    // Set phi inputs of successors of this block corresponding to this block
+    // as live_in.
+    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+      HBasicBlock* successor = block->GetSuccessors().Get(i);
+      live_in->Union(GetLiveInSet(*successor));
+      size_t phi_input_index = successor->GetPredecessorIndexOf(block);
+      for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current()->InputAt(phi_input_index);
+        live_in->SetBit(input->GetSsaIndex());
+      }
+    }
+
+    // Add a range that covers this block to all instructions live_in because of successors.
+    for (uint32_t idx : live_in->Indexes()) {
+      HInstruction* current = instructions_from_ssa_index_.Get(idx);
+      current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
+    }
+
+    for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasSsaIndex()) {
+        // Kill the instruction and shorten its interval.
+        kill->SetBit(current->GetSsaIndex());
+        live_in->ClearBit(current->GetSsaIndex());
+        current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
+      }
+
+      // All inputs of an instruction must be live.
+      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+        HInstruction* input = current->InputAt(i);
+        DCHECK(input->HasSsaIndex());
+        live_in->SetBit(input->GetSsaIndex());
+        input->GetLiveInterval()->AddUse(current);
+      }
+
+      if (current->HasEnvironment()) {
+        // All instructions in the environment must be live.
+        GrowableArray<HInstruction*>* environment = current->GetEnvironment()->GetVRegs();
+        for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+          HInstruction* instruction = environment->Get(i);
+          if (instruction != nullptr) {
+            DCHECK(instruction->HasSsaIndex());
+            live_in->SetBit(instruction->GetSsaIndex());
+            instruction->GetLiveInterval()->AddUse(current);
+          }
+        }
+      }
+    }
+
+    // Kill phis defined in this block.
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* current = it.Current();
+      if (current->HasSsaIndex()) {
+        kill->SetBit(current->GetSsaIndex());
+        live_in->ClearBit(current->GetSsaIndex());
+      }
+    }
+
+    if (block->IsLoopHeader()) {
+      HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+      // For all live_in instructions at the loop header, we need to create a range
+      // that covers the full loop.
+      for (uint32_t idx : live_in->Indexes()) {
+        HInstruction* current = instructions_from_ssa_index_.Get(idx);
+        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
+                                                 back_edge->GetLifetimeEnd());
+      }
+    }
+  }
+}
+
+void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
+  bool changed;
+  do {
+    changed = false;
+
+    for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+      const HBasicBlock& block = *it.Current();
+
+      // The live_in set depends on the kill set (which does not
+      // change in this loop), and the live_out set.  If the live_out
+      // set does not change, there is no need to update the live_in set.
+      if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+        changed = true;
+      }
+    }
+  } while (changed);
+}
+
+bool SsaLivenessAnalysis::UpdateLiveOut(const HBasicBlock& block) {
+  BitVector* live_out = GetLiveOutSet(block);
+  bool changed = false;
+  // The live_out set of a block is the union of live_in sets of its successors.
+  for (size_t i = 0, e = block.GetSuccessors().Size(); i < e; ++i) {
+    HBasicBlock* successor = block.GetSuccessors().Get(i);
+    if (live_out->Union(GetLiveInSet(*successor))) {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+
+bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
+  BitVector* live_out = GetLiveOutSet(block);
+  BitVector* kill = GetKillSet(block);
+  BitVector* live_in = GetLiveInSet(block);
+  // If live_out is updated (because of backward branches), we need to make
+  // sure instructions in live_out are also in live_in, unless they are killed
+  // by this block.
+  return live_in->UnionIfNotIn(live_out, kill);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
new file mode 100644
index 0000000..2d91436
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class BlockInfo : public ArenaObject {
+ public:
+  BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
+      : block_(block),
+        live_in_(allocator, number_of_ssa_values, false),
+        live_out_(allocator, number_of_ssa_values, false),
+        kill_(allocator, number_of_ssa_values, false) {
+    live_in_.ClearAllBits();
+    live_out_.ClearAllBits();
+    kill_.ClearAllBits();
+  }
+
+ private:
+  const HBasicBlock& block_;
+  ArenaBitVector live_in_;
+  ArenaBitVector live_out_;
+  ArenaBitVector kill_;
+
+  friend class SsaLivenessAnalysis;
+
+  DISALLOW_COPY_AND_ASSIGN(BlockInfo);
+};
+
+/**
+ * A live range contains the start and end of a range where an instruction
+ * is live.
+ */
+class LiveRange : public ValueObject {
+ public:
+  LiveRange(size_t start, size_t end) : start_(start), end_(end) {
+    DCHECK_LT(start, end);
+  }
+
+  size_t GetStart() const { return start_; }
+  size_t GetEnd() const { return end_; }
+
+ private:
+  size_t start_;
+  size_t end_;
+};
+
+static constexpr int kDefaultNumberOfRanges = 3;
+
+/**
+ * An interval is a list of disjoint live ranges where an instruction is live.
+ * Each instruction that has uses gets an interval.
+ */
+class LiveInterval : public ArenaObject {
+ public:
+  explicit LiveInterval(ArenaAllocator* allocator) : ranges_(allocator, kDefaultNumberOfRanges) {}
+
+  void AddUse(HInstruction* instruction) {
+    size_t position = instruction->GetLifetimePosition();
+    size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
+    size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd();
+    if (ranges_.IsEmpty()) {
+      // First time we see a use of that interval.
+      ranges_.Add(LiveRange(start_block_position, position));
+    } else if (ranges_.Peek().GetStart() == start_block_position) {
+      // There is a use later in the same block.
+      DCHECK_LE(position, ranges_.Peek().GetEnd());
+    } else if (ranges_.Peek().GetStart() == end_block_position + 1) {
+      // Last use is in a following block.
+      LiveRange existing = ranges_.Pop();
+      ranges_.Add(LiveRange(start_block_position, existing.GetEnd()));
+    } else {
+      // There is a hole in the interval. Create a new range.
+      ranges_.Add(LiveRange(start_block_position, position));
+    }
+  }
+
+  void AddRange(size_t start, size_t end) {
+    if (ranges_.IsEmpty()) {
+      ranges_.Add(LiveRange(start, end));
+    } else if (ranges_.Peek().GetStart() == end + 1) {
+      // There is a use in the following block.
+      LiveRange existing = ranges_.Pop();
+      ranges_.Add(LiveRange(start, existing.GetEnd()));
+    } else {
+      // There is a hole in the interval. Create a new range.
+      ranges_.Add(LiveRange(start, end));
+    }
+  }
+
+  void AddLoopRange(size_t start, size_t end) {
+    DCHECK(!ranges_.IsEmpty());
+    while (!ranges_.IsEmpty() && ranges_.Peek().GetEnd() < end) {
+      DCHECK_LE(start, ranges_.Peek().GetStart());
+      ranges_.Pop();
+    }
+    if (ranges_.IsEmpty()) {
+      // Uses are only in the loop.
+      ranges_.Add(LiveRange(start, end));
+    } else {
+      // There are uses after the loop.
+      LiveRange range = ranges_.Pop();
+      ranges_.Add(LiveRange(start, range.GetEnd()));
+    }
+  }
+
+  void SetFrom(size_t from) {
+    DCHECK(!ranges_.IsEmpty());
+    LiveRange existing = ranges_.Pop();
+    ranges_.Add(LiveRange(from, existing.GetEnd()));
+  }
+
+  const GrowableArray<LiveRange>& GetRanges() const { return ranges_; }
+
+ private:
+  GrowableArray<LiveRange> ranges_;
+
+  DISALLOW_COPY_AND_ASSIGN(LiveInterval);
+};
+
+class SsaLivenessAnalysis : public ValueObject {
+ public:
+  explicit SsaLivenessAnalysis(const HGraph& graph)
+      : graph_(graph),
+        linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
+        block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
+        instructions_from_ssa_index_(graph.GetArena(), 0),
+        number_of_ssa_values_(0) {
+    block_infos_.SetSize(graph.GetBlocks().Size());
+  }
+
+  void Analyze();
+
+  BitVector* GetLiveInSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->live_in_;
+  }
+
+  BitVector* GetLiveOutSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->live_out_;
+  }
+
+  BitVector* GetKillSet(const HBasicBlock& block) const {
+    return &block_infos_.Get(block.GetBlockId())->kill_;
+  }
+
+  const GrowableArray<HBasicBlock*>& GetLinearPostOrder() const {
+    return linear_post_order_;
+  }
+
+  HInstruction* GetInstructionFromSsaIndex(size_t index) {
+    return instructions_from_ssa_index_.Get(index);
+  }
+
+ private:
+  // Linearize the graph so that:
+  // (1): a block is always after its dominator,
+  // (2): blocks of loops are contiguous.
+  // This creates a natural and efficient ordering when visualizing live ranges.
+  void LinearizeGraph();
+
+  // Give an SSA number to each instruction that defines a value used by another instruction,
+  // and setup the lifetime information of each instruction and block.
+  void NumberInstructions();
+
+  // Compute live ranges of instructions, as well as live_in, live_out and kill sets.
+  void ComputeLiveness();
+
+  // Compute the live ranges of instructions, as well as the initial live_in, live_out and
+  // kill sets, that do not take into account backward branches.
+  void ComputeLiveRanges();
+
+  // After computing the initial sets, this method does a fixed point
+  // calculation over the live_in and live_out set to take into account
+  // backwards branches.
+  void ComputeLiveInAndLiveOutSets();
+
+  // Update the live_in set of the block and returns whether it has changed.
+  bool UpdateLiveIn(const HBasicBlock& block);
+
+  // Update the live_out set of the block and returns whether it has changed.
+  bool UpdateLiveOut(const HBasicBlock& block);
+
+  const HGraph& graph_;
+  GrowableArray<HBasicBlock*> linear_post_order_;
+  GrowableArray<BlockInfo*> block_infos_;
+  GrowableArray<HInstruction*> instructions_from_ssa_index_;
+  size_t number_of_ssa_values_;
+
+  DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 7c3633b..d104619 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,9 +28,9 @@
 
 namespace art {
 
-class StringPrettyPrinter : public HPrettyPrinter {
+class SsaPrettyPrinter : public HPrettyPrinter {
  public:
-  explicit StringPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
+  explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
 
   virtual void PrintInt(int value) {
     str_ += StringPrintf("%d", value);
@@ -59,17 +59,17 @@
  private:
   std::string str_;
 
-  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
+  DISALLOW_COPY_AND_ASSIGN(SsaPrettyPrinter);
 };
 
 static void ReNumberInstructions(HGraph* graph) {
   int id = 0;
-  for (size_t i = 0; i < graph->GetBlocks()->Size(); i++) {
-    HBasicBlock* block = graph->GetBlocks()->Get(i);
-    for (HInstructionIterator it(*block->GetPhis()); !it.Done(); it.Advance()) {
+  for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) {
+    HBasicBlock* block = graph->GetBlocks().Get(i);
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       it.Current()->SetId(id++);
     }
-    for (HInstructionIterator it(*block->GetInstructions()); !it.Done(); it.Advance()) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       it.Current()->SetId(id++);
     }
   }
@@ -82,11 +82,12 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
+
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   ReNumberInstructions(graph);
 
-  StringPrettyPrinter printer(graph);
+  SsaPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
 
   ASSERT_STREQ(expected, printer.str().c_str());
@@ -98,15 +99,18 @@
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [2, 2]\n"
     "  1: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "BasicBlock 1, pred: 0, succ: 2, 5\n"
     "  2: Equal(0, 0) [3]\n"
     "  3: If(2)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  4: Goto\n"
-    "BasicBlock 3, pred: 1, 2, succ: 4\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
     "  5: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
-    "  6: Exit\n";
+    "  6: Exit\n"
+    // Synthesized block to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  7: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -125,16 +129,19 @@
     "  0: IntConstant 0 [6, 3, 3]\n"
     "  1: IntConstant 4 [6]\n"
     "  2: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "BasicBlock 1, pred: 0, succ: 2, 5\n"
     "  3: Equal(0, 0) [4]\n"
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 1, 2, succ: 4\n"
-    "  6: Phi(0, 1) [7]\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  6: Phi(1, 0) [7]\n"
     "  7: Return(6)\n"
     "BasicBlock 4, pred: 3\n"
-    "  8: Exit\n";
+    "  8: Exit\n"
+    // Synthesized block to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  9: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -147,7 +154,7 @@
 
 TEST(SsaTest, CFG3) {
   // Test that we create a phi for the join block of an if control flow instruction
-  // when there both branches update a local.
+  // when both branches update a local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
@@ -184,16 +191,21 @@
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [6, 4, 2, 2]\n"
     "  1: Goto\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
+    "BasicBlock 1, pred: 0, succ: 5, 6\n"
     "  2: Equal(0, 0) [3]\n"
     "  3: If(2)\n"
-    "BasicBlock 2, pred: 1, 3, succ: 3\n"
-    "  4: Phi(0, 6) [6]\n"
+    "BasicBlock 2, pred: 3, 6, succ: 3\n"
+    "  4: Phi(6, 0) [6]\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 1, 2, succ: 2\n"
-    "  6: Phi(0, 4) [4]\n"
+    "BasicBlock 3, pred: 2, 5, succ: 2\n"
+    "  6: Phi(4, 0) [4]\n"
     "  7: Goto\n"
-    "BasicBlock 4\n";
+    "BasicBlock 4\n"
+    // Synthesized blocks to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  8: Goto\n"
+    "BasicBlock 6, pred: 1, succ: 2\n"
+    "  9: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -349,26 +361,30 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [5, 8, 8]\n"
-    "  2: IntConstant 5 [5]\n"
+    "  1: IntConstant 4 [14, 8, 8]\n"
+    "  2: IntConstant 5 [14]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
-    "  5: Phi(0, 2, 1) [12, 6, 6]\n"
+    "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
+    "  5: Phi(0, 14) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 5, 4\n"
     "  8: Equal(1, 1) [9]\n"
     "  9: If(8)\n"
-    "BasicBlock 4, pred: 3, succ: 2\n"
+    "BasicBlock 4, pred: 3, succ: 8\n"
     "  10: Goto\n"
-    "BasicBlock 5, pred: 3, succ: 2\n"
+    "BasicBlock 5, pred: 3, succ: 8\n"
     "  11: Goto\n"
     "BasicBlock 6, pred: 2, succ: 7\n"
     "  12: Return(5)\n"
     "BasicBlock 7, pred: 6\n"
-    "  13: Exit\n";
+    "  13: Exit\n"
+    // Synthesized single back edge of loop.
+    "BasicBlock 8, pred: 5, 4, succ: 2\n"
+    "  14: Phi(1, 2) [5]\n"
+    "  15: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -393,7 +409,7 @@
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 5, succ: 6, 3\n"
+    "BasicBlock 2, pred: 1, 5, succ: 3, 8\n"
     "  5: Phi(0, 1) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
@@ -404,11 +420,13 @@
     "  10: Goto\n"
     "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
-    "BasicBlock 6, pred: 2, 4, succ: 7\n"
-    "  12: Phi(5, 2) [13]\n"
+    "BasicBlock 6, pred: 4, 8, succ: 7\n"
+    "  12: Phi(2, 5) [13]\n"
     "  13: Return(12)\n"
     "BasicBlock 7, pred: 6\n"
-    "  14: Exit\n";
+    "  14: Exit\n"
+    "BasicBlock 8, pred: 2, succ: 6\n"
+    "  15: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 478a854..97ccc2c 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OUTPUT_STREAM_H_
 
 #include <stdint.h>
+#include <sys/types.h>
 
 #include <string>
 
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index 290bf25..5fa0ccb 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -64,7 +64,7 @@
   FileOutputStream output_stream(tmp.GetFile());
   SetOutputStream(output_stream);
   GenerateTestOutput();
-  UniquePtr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
   EXPECT_TRUE(in.get() != NULL);
   std::vector<uint8_t> actual(in->GetLength());
   bool readSuccess = in->ReadFully(&actual[0], actual.size());
@@ -74,12 +74,12 @@
 
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
-  UniquePtr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
+  std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
   CHECK(file_output_stream.get() != NULL);
   BufferedOutputStream buffered_output_stream(file_output_stream.release());
   SetOutputStream(buffered_output_stream);
   GenerateTestOutput();
-  UniquePtr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
+  std::unique_ptr<File> in(OS::OpenFileForReading(tmp.GetFilename().c_str()));
   EXPECT_TRUE(in.get() != NULL);
   std::vector<uint8_t> actual(in->GetLength());
   bool readSuccess = in->ReadFully(&actual[0], actual.size());
diff --git a/compiler/sea_ir/debug/dot_gen.h b/compiler/sea_ir/debug/dot_gen.h
index d7d21ad..a5d6819 100644
--- a/compiler/sea_ir/debug/dot_gen.h
+++ b/compiler/sea_ir/debug/dot_gen.h
@@ -104,7 +104,7 @@
     LOG(INFO) << "Starting to write SEA string to file " << filename << std::endl;
     DotGenerationVisitor dgv = DotGenerationVisitor(&options_, types);
     graph->Accept(&dgv);
-    // TODO: UniquePtr to close file properly. Switch to BufferedOutputStream.
+    // TODO: std::unique_ptr to close file properly. Switch to BufferedOutputStream.
     art::File* file = art::OS::CreateEmptyFile(filename.c_str());
     art::FileOutputStream fos(file);
     std::string graph_as_string = dgv.GetResult();
diff --git a/compiler/sea_ir/ir/sea.cc b/compiler/sea_ir/ir/sea.cc
index 0734b21..2b25f56 100644
--- a/compiler/sea_ir/ir/sea.cc
+++ b/compiler/sea_ir/ir/sea.cc
@@ -289,7 +289,7 @@
 void SeaGraph::ConvertToSSA() {
   // Pass: find global names.
   // The map @block maps registers to the blocks in which they are defined.
-  std::map<int, std::set<Region*> > blocks;
+  std::map<int, std::set<Region*>> blocks;
   // The set @globals records registers whose use
   // is in a different block than the corresponding definition.
   std::set<int> globals;
@@ -311,7 +311,7 @@
         var_kill.insert(reg_def);
       }
 
-      blocks.insert(std::pair<int, std::set<Region*> >(reg_def, std::set<Region*>()));
+      blocks.insert(std::pair<int, std::set<Region*>>(reg_def, std::set<Region*>()));
       std::set<Region*>* reg_def_blocks = &(blocks.find(reg_def)->second);
       reg_def_blocks->insert(*region_it);
     }
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index fb909a8..24378b4 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,7 +30,7 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  UniquePtr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
@@ -47,7 +47,7 @@
   __ bkpt(0);
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -58,22 +58,19 @@
 namespace arm64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
-  UniquePtr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
+  std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
-      // FIXME IPx used by VIXL - this is unsafe.
       __ JumpTo(Arm64ManagedRegister::FromCoreRegister(X0), Offset(offset.Int32Value()),
           Arm64ManagedRegister::FromCoreRegister(IP1));
 
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
-
       __ LoadRawPtr(Arm64ManagedRegister::FromCoreRegister(IP1),
                       Arm64ManagedRegister::FromCoreRegister(X0),
                       Offset(JNIEnvExt::SelfOffset().Int32Value()));
 
-      // FIXME IPx used by VIXL - this is unsafe.
       __ JumpTo(Arm64ManagedRegister::FromCoreRegister(IP1), Offset(offset.Int32Value()),
                 Arm64ManagedRegister::FromCoreRegister(IP0));
 
@@ -87,7 +84,7 @@
   }
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -98,7 +95,7 @@
 namespace mips {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  UniquePtr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips)));
+  std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -117,7 +114,7 @@
   __ Break();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -127,14 +124,14 @@
 
 namespace x86 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  UniquePtr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
+  std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
   __ int3();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
@@ -144,7 +141,7 @@
 
 namespace x86_64 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  UniquePtr<x86_64::X86_64Assembler>
+  std::unique_ptr<x86_64::X86_64Assembler>
       assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64)));
 
   // All x86 trampolines call via the Thread* held in gs.
@@ -152,7 +149,7 @@
   __ int3();
 
   size_t cs = assembler->CodeSize();
-  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
   assembler->FinalizeInstructions(code);
 
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 18a5bce..032eabc 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "mem_map.h"
+#include "utils.h"
 
 namespace art {
 
@@ -155,7 +156,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = (bytes + 3) & ~3;
+    bytes = RoundUp(bytes, 4);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 5c839dd..64685c1 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1752,53 +1752,53 @@
 #endif
 }
 
-void ArmAssembler::CreateSirtEntry(ManagedRegister mout_reg,
-                                   FrameOffset sirt_offset,
+void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister min_reg, bool null_allowed) {
   ArmManagedRegister out_reg = mout_reg.AsArm();
   ArmManagedRegister in_reg = min_reg.AsArm();
   CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   if (null_allowed) {
-    // Null values get a SIRT entry value of 0.  Otherwise, the SIRT entry is
-    // the address in the SIRT holding the reference.
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
       LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                     SP, sirt_offset.Int32Value());
+                     SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
     cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
     if (!out_reg.Equals(in_reg)) {
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
     }
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offset.Int32Value(), NE);
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offset.Int32Value(), AL);
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
   }
 }
 
-void ArmAssembler::CreateSirtEntry(FrameOffset out_off,
-                                   FrameOffset sirt_offset,
+void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister mscratch,
                                    bool null_allowed) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
   if (null_allowed) {
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   sirt_offset.Int32Value());
-    // Null values get a SIRT entry value of 0.  Otherwise, the sirt entry is
-    // the address in the SIRT holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+sirt_offset)
+                   handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
     cmp(scratch.AsCoreRegister(), ShifterOperand(0));
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value(), NE);
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value(), AL);
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
   }
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
 }
 
-void ArmAssembler::LoadReferenceFromSirt(ManagedRegister mout_reg,
+void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
                                          ManagedRegister min_reg) {
   ArmManagedRegister out_reg = mout_reg.AsArm();
   ArmManagedRegister in_reg = min_reg.AsArm();
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index f5be04a..396e603 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -521,20 +521,20 @@
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset, ManagedRegister in_reg,
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg,
                        bool null_allowed) OVERRIDE;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset, ManagedRegister scratch,
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch,
                        bool null_allowed) OVERRIDE;
 
-  // src holds a SIRT entry (Object**) load this into dst
-  void LoadReferenceFromSirt(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
   // know that src may not be null.
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index b4bb979..27188b2 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -50,11 +50,11 @@
 }
 
 void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(TR1));
+  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(ETR));
 }
 
 void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR1, SP, offset.Int32Value());
+  StoreToOffset(ETR, SP, offset.Int32Value());
 }
 
 // See Arm64 PCS Section 5.2.2.1.
@@ -79,11 +79,13 @@
     // VIXL macro-assembler handles all variants.
     ___ Add(reg_x(rd), reg_x(rn), value);
   } else {
-    // ip1 = rd + value
-    // rd = cond ? ip1 : rn
-    CHECK_NE(rn, IP1);
-    ___ Add(reg_x(IP1), reg_x(rn), value);
-    ___ Csel(reg_x(rd), reg_x(IP1), reg_x(rd), COND_OP(cond));
+    // temp = rd + value
+    // rd = cond ? temp : rn
+    vixl::UseScratchRegisterScope temps(vixl_masm_);
+    temps.Exclude(reg_x(rd), reg_x(rn));
+    vixl::Register temp = temps.AcquireX();
+    ___ Add(temp, reg_x(rn), value);
+    ___ Csel(reg_x(rd), temp, reg_x(rd), COND_OP(cond));
   }
 }
 
@@ -162,7 +164,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(scratch.AsCoreRegister(), TR1, offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
@@ -171,13 +173,14 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
-  // Arm64 does not support: "str sp, [dest]" therefore we use IP1 as a temp reg.
-  ___ Mov(reg_x(IP1), reg_x(SP));
-  StoreToOffset(IP1, TR1, tr_offs.Int32Value());
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  vixl::Register temp = temps.AcquireX();
+  ___ Mov(temp, reg_x(SP));
+  ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value()));
 }
 
 void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -195,12 +198,14 @@
   if ((cond == AL) || (cond == NV)) {
     ___ Mov(reg_x(dest), value);
   } else {
-    // ip1 = value
-    // rd = cond ? ip1 : rd
+    // temp = value
+    // rd = cond ? temp : rd
     if (value != 0) {
-      CHECK_NE(dest, IP1);
-      ___ Mov(reg_x(IP1), value);
-      ___ Csel(reg_x(dest), reg_x(IP1), reg_x(dest), COND_OP(cond));
+      vixl::UseScratchRegisterScope temps(vixl_masm_);
+      temps.Exclude(reg_x(dest));
+      vixl::Register temp = temps.AcquireX();
+      ___ Mov(temp, value);
+      ___ Csel(reg_x(dest), temp, reg_x(dest), COND_OP(cond));
     } else {
       ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), COND_OP(cond));
     }
@@ -276,7 +281,7 @@
 }
 
 void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), TR1, src.Int32Value(), size);
+  return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size);
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
@@ -298,13 +303,16 @@
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsCoreRegister() && base.IsCoreRegister());
-  LoadFromOffset(dst.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+  // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(dst.AsCoreRegister()), reg_x(base.AsCoreRegister()));
+  ___ Ldr(reg_x(dst.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
 }
 
 void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(dst.AsCoreRegister(), TR1, offs.Int32Value());
+  LoadFromOffset(dst.AsCoreRegister(), ETR, offs.Int32Value());
 }
 
 // Copying routines.
@@ -342,7 +350,7 @@
                                           ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
   StoreToOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
 }
 
@@ -352,7 +360,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
@@ -511,7 +519,10 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsCoreRegister()) << base;
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+  // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(base.AsCoreRegister()), reg_x(scratch.AsCoreRegister()));
+  ___ Ldr(reg_x(scratch.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsCoreRegister()));
 }
 
@@ -528,52 +539,52 @@
   UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
-void Arm64Assembler::CreateSirtEntry(ManagedRegister m_out_reg, FrameOffset sirt_offs,
+void Arm64Assembler::CreateHandleScopeEntry(ManagedRegister m_out_reg, FrameOffset handle_scope_offs,
                                      ManagedRegister m_in_reg, bool null_allowed) {
   Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
   Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
-  // For now we only hold stale sirt entries in x registers.
+  // For now we only hold stale handle scope entries in x registers.
   CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   if (null_allowed) {
-    // Null values get a SIRT entry value of 0.  Otherwise, the SIRT entry is
-    // the address in the SIRT holding the reference.
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
       LoadWFromOffset(kLoadWord, out_reg.AsOverlappingCoreRegisterLow(), SP,
-                      sirt_offs.Int32Value());
+                      handle_scope_offs.Int32Value());
       in_reg = out_reg;
     }
     ___ Cmp(reg_w(in_reg.AsOverlappingCoreRegisterLow()), 0);
     if (!out_reg.Equals(in_reg)) {
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
     }
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offs.Int32Value(), NE);
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offs.Int32Value(), NE);
   } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offs.Int32Value(), AL);
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offs.Int32Value(), AL);
   }
 }
 
-void Arm64Assembler::CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset,
+void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset,
                                      ManagedRegister m_scratch, bool null_allowed) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   if (null_allowed) {
     LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP,
-                    sirt_offset.Int32Value());
-    // Null values get a SIRT entry value of 0.  Otherwise, the sirt entry is
-    // the address in the SIRT holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+sirt_offset)
+                    handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
     ___ Cmp(reg_w(scratch.AsOverlappingCoreRegisterLow()), 0);
     // Move this logic in add constants with flags.
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value(), NE);
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value(), AL);
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
   }
   StoreToOffset(scratch.AsCoreRegister(), SP, out_off.Int32Value());
 }
 
-void Arm64Assembler::LoadReferenceFromSirt(ManagedRegister m_out_reg,
+void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
                                            ManagedRegister m_in_reg) {
   Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
   Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
@@ -595,13 +606,17 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsCoreRegister(), TR1, Thread::ExceptionOffset<8>().Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
   ___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
   ___ B(current_exception->Entry(), COND_OP(NE));
 }
 
 void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-    // Bind exception poll entry.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(exception->scratch_.AsCoreRegister()));
+  vixl::Register temp = temps.AcquireX();
+
+  // Bind exception poll entry.
   ___ Bind(exception->Entry());
   if (exception->stack_adjust_ != 0) {  // Fix up the frame.
     DecreaseFrameSize(exception->stack_adjust_);
@@ -609,12 +624,14 @@
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
   ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsCoreRegister()));
-  LoadFromOffset(IP1, TR1, QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+  ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR), reg_x(TR1));
+  // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
+  // to external functions that might trash TR. We do not need the original
+  // X19 saved in BuildFrame().
+  ___ Mov(reg_x(TR), reg_x(ETR));
 
-  ___ Blr(reg_x(IP1));
+  ___ Blr(temp);
   // Call should never return.
   ___ Brk();
 }
@@ -634,8 +651,10 @@
   CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
   ___ PushCalleeSavedRegisters();
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR1), reg_x(TR));
+  // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been
+  // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not
+  // trashed by native code.
+  ___ Mov(reg_x(ETR), reg_x(TR));
 
   // Increate frame to required size - must be at least space to push Method*.
   CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
@@ -681,8 +700,10 @@
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR), reg_x(TR1));
+  // We move ETR (Callee Saved) back to TR (Caller Saved) which might have
+  // been trashed in the native call. The original X19 (ETR) is restored as
+  // part of PopCalleeSavedRegisters().
+  ___ Mov(reg_x(TR), reg_x(ETR));
 
   // Pop callee saved and return to LR.
   ___ PopCalleeSavedRegisters();
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 97fb93a..ab4999a 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -17,8 +17,9 @@
 #ifndef ART_COMPILER_UTILS_ARM64_ASSEMBLER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_ASSEMBLER_ARM64_H_
 
-#include <vector>
 #include <stdint.h>
+#include <memory>
+#include <vector>
 
 #include "base/logging.h"
 #include "constants_arm64.h"
@@ -26,7 +27,6 @@
 #include "utils/assembler.h"
 #include "offsets.h"
 #include "utils.h"
-#include "UniquePtr.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/disasm-a64.h"
 
@@ -85,6 +85,7 @@
   vixl_masm_(new vixl::MacroAssembler(vixl_buf_, kBufferSizeArm64)) {}
 
   virtual ~Arm64Assembler() {
+    delete vixl_masm_;
     delete[] vixl_buf_;
   }
 
@@ -160,20 +161,20 @@
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset,
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
                        ManagedRegister in_reg, bool null_allowed) OVERRIDE;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset,
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
                        ManagedRegister scratch, bool null_allowed) OVERRIDE;
 
-  // src holds a SIRT entry (Object**) load this into dst.
-  void LoadReferenceFromSirt(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+  // src holds a handle scope entry (Object**) load this into dst.
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
   // know that src may not be null.
@@ -237,8 +238,8 @@
   // Vixl buffer.
   byte* vixl_buf_;
 
-  // Unique ptr - vixl assembler.
-  UniquePtr<vixl::MacroAssembler> vixl_masm_;
+  // Vixl assembler.
+  vixl::MacroAssembler* vixl_masm_;
 
   // List of exception blocks to generate at the end of the code cache.
   std::vector<Arm64Exception*> exception_blocks_;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 219c87f..19239e1 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -453,20 +453,20 @@
   virtual void GetCurrentThread(FrameOffset dest_offset,
                                 ManagedRegister scratch) = 0;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  virtual void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset,
+  virtual void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
                                ManagedRegister in_reg, bool null_allowed) = 0;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  virtual void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset,
+  virtual void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
                                ManagedRegister scratch, bool null_allowed) = 0;
 
-  // src holds a SIRT entry (Object**) load this into dst
-  virtual void LoadReferenceFromSirt(ManagedRegister dst,
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
                                      ManagedRegister src) = 0;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index ce1c4de..754496b 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -19,7 +19,7 @@
 
 #include "assembler.h"
 
-#include "gtest/gtest.h"
+#include "common_runtime_test.h"  // For ScratchFile
 
 #include <cstdio>
 #include <cstdlib>
@@ -30,6 +30,10 @@
 
 namespace art {
 
+// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
+// temp directory.
+static std::string tmpnam_;
+
 template<typename Ass, typename Reg, typename Imm>
 class AssemblerTest : public testing::Test {
  public:
@@ -203,6 +207,10 @@
   void SetUp() OVERRIDE {
     assembler_.reset(new Ass());
 
+    // Fake a runtime test for ScratchFile
+    std::string android_data;
+    CommonRuntimeTest::SetEnvironmentVariables(android_data);
+
     SetUpHelpers();
   }
 
@@ -339,7 +347,7 @@
     }
 
     size_t cs = assembler_->CodeSize();
-    UniquePtr<std::vector<uint8_t> > data(new std::vector<uint8_t>(cs));
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
 
@@ -367,7 +375,7 @@
     bool ok;
     std::string error_msg;
     std::string base_name;
-    UniquePtr<std::vector<uint8_t>> code;
+    std::unique_ptr<std::vector<uint8_t>> code;
     uintptr_t length;
   };
 
@@ -667,17 +675,17 @@
   // Use a consistent tmpnam, so store it.
   std::string GetTmpnam() {
     if (tmpnam_.length() == 0) {
-      tmpnam_ = std::string(tmpnam(nullptr));
+      ScratchFile tmp;
+      tmpnam_ = tmp.GetFilename() + "asm";
     }
     return tmpnam_;
   }
 
-  UniquePtr<Ass> assembler_;
+  std::unique_ptr<Ass> assembler_;
 
   std::string resolved_assembler_cmd_;
   std::string resolved_objdump_cmd_;
   std::string resolved_disassemble_cmd_;
-  std::string tmpnam_;
 
   static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
 };
diff --git a/compiler/utils/debug_stack.h b/compiler/utils/debug_stack.h
index 2e02b43..1bb0624 100644
--- a/compiler/utils/debug_stack.h
+++ b/compiler/utils/debug_stack.h
@@ -118,7 +118,7 @@
     CheckTop();
   }
   DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) {
-    CHECK(ref_ == other->ref_);
+    CHECK(ref_ == other.ref_);
     CheckTop();
     return *this;
   }
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 7cc253c..4c52174 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -77,7 +77,7 @@
 
  private:
   std::string lock_name_[kShard];
-  UniquePtr<Mutex> lock_[kShard];
+  std::unique_ptr<Mutex> lock_[kShard];
   std::set<HashedKey, Comparator> keys_[kShard];
 
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 659b4f7..e703d8e 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -78,7 +78,7 @@
 
       private:
         size_t idx_;
-        GrowableArray* const g_list_;
+        GrowableArray* g_list_;
     };
 
     GrowableArray(ArenaAllocator* arena, size_t init_length, OatListKind kind = kGrowableArrayMisc)
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 9001f8a..8001dcd 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -827,8 +827,8 @@
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
-void MipsAssembler::CreateSirtEntry(ManagedRegister mout_reg,
-                                    FrameOffset sirt_offset,
+void MipsAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                    FrameOffset handle_scope_offset,
                                     ManagedRegister min_reg, bool null_allowed) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
@@ -836,27 +836,27 @@
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   if (null_allowed) {
     Label null_arg;
-    // Null values get a SIRT entry value of 0.  Otherwise, the SIRT entry is
-    // the address in the SIRT holding the reference.
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
       LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
-                     SP, sirt_offset.Int32Value());
+                     SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
     if (!out_reg.Equals(in_reg)) {
       LoadImmediate(out_reg.AsCoreRegister(), 0);
     }
     EmitBranch(in_reg.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offset.Int32Value());
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
     Bind(&null_arg, false);
   } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, sirt_offset.Int32Value());
+    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
 }
 
-void MipsAssembler::CreateSirtEntry(FrameOffset out_off,
-                                    FrameOffset sirt_offset,
+void MipsAssembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                    FrameOffset handle_scope_offset,
                                     ManagedRegister mscratch,
                                     bool null_allowed) {
   MipsManagedRegister scratch = mscratch.AsMips();
@@ -864,21 +864,21 @@
   if (null_allowed) {
     Label null_arg;
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   sirt_offset.Int32Value());
-    // Null values get a SIRT entry value of 0.  Otherwise, the sirt entry is
-    // the address in the SIRT holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+sirt_offset)
+                   handle_scope_offset.Int32Value());
+    // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
+    // the address in the handle scope holding the reference.
+    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
     EmitBranch(scratch.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value());
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
     Bind(&null_arg, false);
   } else {
-    AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value());
+    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
 }
 
-// Given a SIRT entry, load the associated reference.
-void MipsAssembler::LoadReferenceFromSirt(ManagedRegister mout_reg,
+// Given a handle scope entry, load the associated reference.
+void MipsAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
                                           ManagedRegister min_reg) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 75ee8b9..216cb41 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -238,20 +238,20 @@
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset, ManagedRegister in_reg,
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg,
                        bool null_allowed) OVERRIDE;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset, ManagedRegister mscratch,
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch,
                        bool null_allowed) OVERRIDE;
 
-  // src holds a SIRT entry (Object**) load this into dst
-  void LoadReferenceFromSirt(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
   // know that src may not be null.
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index bd78eae..b8b0e6e 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + kValgrindRedZoneBytes + 3) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index 28e86ec..c090062 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = (bytes + 3) & ~3;
+    size_t rounded_bytes = RoundUp(bytes, 4);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
@@ -235,8 +235,24 @@
 
   template <typename U>
   friend class ScopedArenaAllocatorAdapter;
+
+  template <typename U>
+  friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs,
+                         const ScopedArenaAllocatorAdapter<U>& rhs);
 };
 
+template <typename T>
+inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs,
+                       const ScopedArenaAllocatorAdapter<T>& rhs) {
+  return lhs.arena_stack_ == rhs.arena_stack_;
+}
+
+template <typename T>
+inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs,
+                       const ScopedArenaAllocatorAdapter<T>& rhs) {
+  return !(lhs == rhs);
+}
+
 inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter() {
   return ScopedArenaAllocatorAdapter<void>(this);
 }
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
new file mode 100644
index 0000000..5deb661
--- /dev/null
+++ b/compiler/utils/scoped_arena_containers.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
+#define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
+
+#include <vector>
+#include <set>
+
+#include "utils/scoped_arena_allocator.h"
+#include "safe_map.h"
+
+namespace art {
+
+template <typename T>
+using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
+
+template <typename T, typename Comparator = std::less<T>>
+using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
+
+template <typename K, typename V, typename Comparator = std::less<K>>
+using ScopedArenaSafeMap =
+    SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>;
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
diff --git a/compiler/utils/scoped_hashtable.h b/compiler/utils/scoped_hashtable.h
index ccec7ba..bf8dd1f 100644
--- a/compiler/utils/scoped_hashtable.h
+++ b/compiler/utils/scoped_hashtable.h
@@ -36,7 +36,7 @@
   // Lookups entry K starting from the current (topmost) scope
   // and returns its value if found or NULL.
   V Lookup(K k) const {
-    for (typename std::list<std::map<K, V> >::const_iterator scopes_it = scopes.begin();
+    for (typename std::list<std::map<K, V>>::const_iterator scopes_it = scopes.begin();
         scopes_it != scopes.end(); scopes_it++) {
       typename std::map<K, V>::const_iterator result_it = (*scopes_it).find(k);
       if (result_it != (*scopes_it).end()) {
@@ -64,7 +64,7 @@
   }
 
  private:
-  std::list<std::map<K, V> > scopes;
+  std::list<std::map<K, V>> scopes;
 };
 }  // namespace utils
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6a3efc5..0791c63 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1727,8 +1727,8 @@
 #endif
 }
 
-void X86Assembler::CreateSirtEntry(ManagedRegister mout_reg,
-                                   FrameOffset sirt_offset,
+void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister min_reg, bool null_allowed) {
   X86ManagedRegister out_reg = mout_reg.AsX86();
   X86ManagedRegister in_reg = min_reg.AsX86();
@@ -1742,34 +1742,34 @@
     }
     testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
     j(kZero, &null_arg);
-    leal(out_reg.AsCpuRegister(), Address(ESP, sirt_offset));
+    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
     Bind(&null_arg);
   } else {
-    leal(out_reg.AsCpuRegister(), Address(ESP, sirt_offset));
+    leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset));
   }
 }
 
-void X86Assembler::CreateSirtEntry(FrameOffset out_off,
-                                   FrameOffset sirt_offset,
+void X86Assembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister mscratch,
                                    bool null_allowed) {
   X86ManagedRegister scratch = mscratch.AsX86();
   CHECK(scratch.IsCpuRegister());
   if (null_allowed) {
     Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(ESP, sirt_offset));
+    movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
     testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
     j(kZero, &null_arg);
-    leal(scratch.AsCpuRegister(), Address(ESP, sirt_offset));
+    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
     Bind(&null_arg);
   } else {
-    leal(scratch.AsCpuRegister(), Address(ESP, sirt_offset));
+    leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
   }
   Store(out_off, scratch, 4);
 }
 
-// Given a SIRT entry, load the associated reference.
-void X86Assembler::LoadReferenceFromSirt(ManagedRegister mout_reg,
+// Given a handle scope entry, load the associated reference.
+void X86Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
                                          ManagedRegister min_reg) {
   X86ManagedRegister out_reg = mout_reg.AsX86();
   X86ManagedRegister in_reg = min_reg.AsX86();
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 057c80a..2fc6049 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -541,20 +541,20 @@
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset, ManagedRegister in_reg,
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg,
                        bool null_allowed) OVERRIDE;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset, ManagedRegister scratch,
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch,
                        bool null_allowed) OVERRIDE;
 
-  // src holds a SIRT entry (Object**) load this into dst
-  void LoadReferenceFromSirt(ManagedRegister dst, ManagedRegister src) OVERRIDE;
+  // src holds a handle scope entry (Object**) load this into dst
+  void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
   // know that src may not be null.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 8eaeae1..0ede875 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1989,8 +1989,8 @@
 #endif
 }
 
-void X86_64Assembler::CreateSirtEntry(ManagedRegister mout_reg,
-                                   FrameOffset sirt_offset,
+void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister min_reg, bool null_allowed) {
   X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
   X86_64ManagedRegister in_reg = min_reg.AsX86_64();
@@ -1998,7 +1998,7 @@
     // Use out_reg as indicator of NULL
     in_reg = out_reg;
     // TODO: movzwl
-    movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   }
   CHECK(in_reg.IsCpuRegister());
   CHECK(out_reg.IsCpuRegister());
@@ -2010,34 +2010,34 @@
     }
     testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
     j(kZero, &null_arg);
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
     Bind(&null_arg);
   } else {
-    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   }
 }
 
-void X86_64Assembler::CreateSirtEntry(FrameOffset out_off,
-                                   FrameOffset sirt_offset,
+void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
+                                   FrameOffset handle_scope_offset,
                                    ManagedRegister mscratch,
                                    bool null_allowed) {
   X86_64ManagedRegister scratch = mscratch.AsX86_64();
   CHECK(scratch.IsCpuRegister());
   if (null_allowed) {
     Label null_arg;
-    movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
     testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
     j(kZero, &null_arg);
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
     Bind(&null_arg);
   } else {
-    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), sirt_offset));
+    leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
   }
   Store(out_off, scratch, 8);
 }
 
-// Given a SIRT entry, load the associated reference.
-void X86_64Assembler::LoadReferenceFromSirt(ManagedRegister mout_reg,
+// Given a handle scope entry, load the associated reference.
+void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
                                          ManagedRegister min_reg) {
   X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
   X86_64ManagedRegister in_reg = min_reg.AsX86_64();
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 87fb359..548d379 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -566,20 +566,20 @@
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
 
-  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the SIRT entry to see if the value is
+  // that can be used to avoid loading the handle scope entry to see if the value is
   // NULL.
-  void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset, ManagedRegister in_reg,
+  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg,
                        bool null_allowed) OVERRIDE;
 
-  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // Set up out_off to hold a Object** into the handle scope, or to be NULL if the
   // value is null and null_allowed.
-  void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset, ManagedRegister scratch,
+  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch,
                        bool null_allowed) OVERRIDE;
 
-  // src holds a SIRT entry (Object**) load this into dst
-  virtual void LoadReferenceFromSirt(ManagedRegister dst,
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
                                      ManagedRegister src);
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index e99c76f..03d32f0 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -16,7 +16,7 @@
 
 LOCAL_PATH := $(call my-dir)
 
-dalvikvm_cflags := -Wall -Werror -Wextra
+dalvikvm_cflags := -Wall -Werror -Wextra -std=gnu++11
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
@@ -24,27 +24,34 @@
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
+LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libdl libnativehelper
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_MULTILIB := both
-LOCAL_MODULE_STEM_32 := dalvikvm
+LOCAL_MODULE_STEM_32 := dalvikvm32
 LOCAL_MODULE_STEM_64 := dalvikvm64
-include art/build/Android.libcxx.mk
+include external/libcxx/libcxx.mk
 include $(BUILD_EXECUTABLE)
+
+# create symlink for the primary version target.
+include  $(BUILD_SYSTEM)/executable_prefer_symlink.mk
+
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 
 ifeq ($(WITH_HOST_DALVIK),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
 LOCAL_MODULE_TAGS := optional
+LOCAL_CLANG := true
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
+LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libnativehelper
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_IS_HOST_MODULE := true
-include art/build/Android.libcxx.mk
+include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 endif
diff --git a/dalvikvm/dalvikvm.cc b/dalvikvm/dalvikvm.cc
index 8d71a7c..67794c8 100644
--- a/dalvikvm/dalvikvm.cc
+++ b/dalvikvm/dalvikvm.cc
@@ -16,15 +16,15 @@
 
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-
 #include <algorithm>
+#include <memory>
 
 #include "jni.h"
 #include "JniInvocation.h"
 #include "ScopedLocalRef.h"
 #include "toStringArray.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -117,7 +117,7 @@
   // We're over-allocating, because this includes the options to the runtime
   // plus the options to the program.
   int option_count = argc;
-  UniquePtr<JavaVMOption[]> options(new JavaVMOption[option_count]());
+  std::unique_ptr<JavaVMOption[]> options(new JavaVMOption[option_count]());
 
   // Copy options over.  Everything up to the name of the class starts
   // with a '-' (the function hook stuff is strictly internal).
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cdf26f1..f0b5750 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -33,7 +33,7 @@
 #include "compiler.h"
 #include "compiler_callbacks.h"
 #include "dex_file-inl.h"
-#include "dex/pass_driver.h"
+#include "dex/pass_driver_me.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_callbacks_impl.h"
 #include "driver/compiler_driver.h"
@@ -228,7 +228,7 @@
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
     CHECK(verification_results != nullptr);
     CHECK(method_inliner_map != nullptr);
-    UniquePtr<Dex2Oat> dex2oat(new Dex2Oat(&compiler_options,
+    std::unique_ptr<Dex2Oat> dex2oat(new Dex2Oat(&compiler_options,
                                            compiler_kind,
                                            instruction_set,
                                            instruction_set_features,
@@ -236,7 +236,7 @@
                                            method_inliner_map,
                                            thread_count));
     if (!dex2oat->CreateRuntime(runtime_options, instruction_set)) {
-      *p_dex2oat = NULL;
+      *p_dex2oat = nullptr;
       return false;
     }
     *p_dex2oat = dex2oat.release();
@@ -256,19 +256,19 @@
 
   // Reads the class names (java.lang.Object) and returns a set of descriptors (Ljava/lang/Object;)
   CompilerDriver::DescriptorSet* ReadImageClassesFromFile(const char* image_classes_filename) {
-    UniquePtr<std::ifstream> image_classes_file(new std::ifstream(image_classes_filename,
+    std::unique_ptr<std::ifstream> image_classes_file(new std::ifstream(image_classes_filename,
                                                                   std::ifstream::in));
-    if (image_classes_file.get() == NULL) {
+    if (image_classes_file.get() == nullptr) {
       LOG(ERROR) << "Failed to open image classes file " << image_classes_filename;
-      return NULL;
+      return nullptr;
     }
-    UniquePtr<CompilerDriver::DescriptorSet> result(ReadImageClasses(*image_classes_file.get()));
+    std::unique_ptr<CompilerDriver::DescriptorSet> result(ReadImageClasses(*image_classes_file.get()));
     image_classes_file->close();
     return result.release();
   }
 
   CompilerDriver::DescriptorSet* ReadImageClasses(std::istream& image_classes_stream) {
-    UniquePtr<CompilerDriver::DescriptorSet> image_classes(new CompilerDriver::DescriptorSet);
+    std::unique_ptr<CompilerDriver::DescriptorSet> image_classes(new CompilerDriver::DescriptorSet);
     while (image_classes_stream.good()) {
       std::string dot;
       std::getline(image_classes_stream, dot);
@@ -285,22 +285,22 @@
   CompilerDriver::DescriptorSet* ReadImageClassesFromZip(const char* zip_filename,
                                                          const char* image_classes_filename,
                                                          std::string* error_msg) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(zip_filename, error_msg));
-    if (zip_archive.get() == NULL) {
-      return NULL;
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(zip_filename, error_msg));
+    if (zip_archive.get() == nullptr) {
+      return nullptr;
     }
-    UniquePtr<ZipEntry> zip_entry(zip_archive->Find(image_classes_filename, error_msg));
-    if (zip_entry.get() == NULL) {
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(image_classes_filename, error_msg));
+    if (zip_entry.get() == nullptr) {
       *error_msg = StringPrintf("Failed to find '%s' within '%s': %s", image_classes_filename,
                                 zip_filename, error_msg->c_str());
-      return NULL;
+      return nullptr;
     }
-    UniquePtr<MemMap> image_classes_file(zip_entry->ExtractToMemMap(image_classes_filename,
+    std::unique_ptr<MemMap> image_classes_file(zip_entry->ExtractToMemMap(image_classes_filename,
                                                                     error_msg));
-    if (image_classes_file.get() == NULL) {
+    if (image_classes_file.get() == nullptr) {
       *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", image_classes_filename,
                                 zip_filename, error_msg->c_str());
-      return NULL;
+      return nullptr;
     }
     const std::string image_classes_string(reinterpret_cast<char*>(image_classes_file->Begin()),
                                            image_classes_file->Size());
@@ -315,14 +315,14 @@
                                       File* oat_file,
                                       const std::string& bitcode_filename,
                                       bool image,
-                                      UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
+                                      std::unique_ptr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
                                       bool dump_passes,
                                       TimingLogger& timings,
                                       CumulativeLogger& compiler_phases_timings,
                                       std::string profile_file) {
-    // SirtRef and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = NULL;
+    // Handle and ClassLoader creation needs to come after Runtime::Create
+    jobject class_loader = nullptr;
     Thread* self = Thread::Current();
     if (!boot_image_option.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -339,7 +339,7 @@
       Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
-    UniquePtr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
+    std::unique_ptr<CompilerDriver> driver(new CompilerDriver(compiler_options_,
                                                         verification_results_,
                                                         method_inliner_map_,
                                                         compiler_kind_,
@@ -380,7 +380,7 @@
     TimingLogger::ScopedSplit split("Writing ELF", &timings);
     if (!driver->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
-      return NULL;
+      return nullptr;
     }
 
     return driver.release();
@@ -403,8 +403,8 @@
       oat_data_begin = image_writer.GetOatDataBegin();
     }
 
-    UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
-    if (oat_file.get() == NULL) {
+    std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
+    if (oat_file.get() == nullptr) {
       PLOG(ERROR) << "Failed to open ELF file: " << oat_filename;
       return false;
     }
@@ -444,10 +444,11 @@
       return false;
     }
     Runtime* runtime = Runtime::Current();
+    runtime->SetInstructionSet(instruction_set);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
       Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
       if (!runtime->HasCalleeSaveMethod(type)) {
-        runtime->SetCalleeSaveMethod(runtime->CreateCalleeSaveMethod(instruction_set, type), type);
+        runtime->SetCalleeSaveMethod(runtime->CreateCalleeSaveMethod(type), type);
       }
     }
     runtime->GetClassLinker()->FixupDexCaches(runtime->GetResolutionMethod());
@@ -469,7 +470,7 @@
       }
       std::string error_msg;
       const DexFile* dex_file = DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg);
-      if (dex_file == NULL) {
+      if (dex_file == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
       } else {
         dex_files.push_back(dex_file);
@@ -527,7 +528,7 @@
       continue;
     }
     const DexFile* dex_file = DexFile::Open(dex_filename, dex_location, &error_msg);
-    if (dex_file == NULL) {
+    if (dex_file == nullptr) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     } else {
@@ -564,8 +565,8 @@
     }
     shutting_down_ = false;
     const char* reason = "dex2oat watch dog thread startup";
-    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_init, (&mutex_, NULL), reason);
-    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_init, (&cond_, NULL), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_init, (&mutex_, nullptr), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_init, (&cond_, nullptr), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_attr_init, (&attr_), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_create, (&pthread_, &attr_, &CallBack, this), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_attr_destroy, (&attr_), reason);
@@ -580,7 +581,7 @@
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_signal, (&cond_), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_unlock, (&mutex_), reason);
 
-    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_join, (pthread_, NULL), reason);
+    CHECK_WATCH_DOG_PTHREAD_CALL(pthread_join, (pthread_, nullptr), reason);
 
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_cond_destroy, (&cond_), reason);
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_destroy, (&mutex_), reason);
@@ -591,7 +592,7 @@
     WatchDog* self = reinterpret_cast<WatchDog*>(arg);
     ::art::SetThreadName("dex2oat watch dog");
     self->Wait();
-    return NULL;
+    return nullptr;
   }
 
   static void Message(char severity, const std::string& message) {
@@ -687,6 +688,12 @@
     } else if (feature == "nodiv") {
       // Turn off support for divide instruction.
       result.SetHasDivideInstruction(false);
+    } else if (feature == "lpae") {
+      // Supports Large Physical Address Extension.
+      result.SetHasLpae(true);
+    } else if (feature == "nolpae") {
+      // Turn off support for Large Physical Address Extension.
+      result.SetHasLpae(false);
     } else {
       Usage("Unknown instruction set feature: '%s'", feature.c_str());
     }
@@ -721,8 +728,8 @@
   std::string oat_location;
   int oat_fd = -1;
   std::string bitcode_filename;
-  const char* image_classes_zip_filename = NULL;
-  const char* image_classes_filename = NULL;
+  const char* image_classes_zip_filename = nullptr;
+  const char* image_classes_filename = nullptr;
   std::string image_filename;
   std::string boot_image_filename;
   uintptr_t image_base = 0;
@@ -732,7 +739,7 @@
   Compiler::Kind compiler_kind = kUsePortableCompiler
       ? Compiler::kPortable
       : Compiler::kQuick;
-  const char* compiler_filter_string = NULL;
+  const char* compiler_filter_string = nullptr;
   int huge_method_threshold = CompilerOptions::kDefaultHugeMethodThreshold;
   int large_method_threshold = CompilerOptions::kDefaultLargeMethodThreshold;
   int small_method_threshold = CompilerOptions::kDefaultSmallMethodThreshold;
@@ -758,7 +765,7 @@
 
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
-    bool log_options = false;
+    const bool log_options = false;
     if (log_options) {
       LOG(INFO) << "dex2oat: option[" << i << "]=" << argv[i];
     }
@@ -911,10 +918,10 @@
     } else if (option == "--no-profile-file") {
       // No profile
     } else if (option == "--print-pass-names") {
-      PassDriver::PrintPassNames();
+      PassDriverME::PrintPassNames();
     } else if (option.starts_with("--disable-passes=")) {
       std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
-      PassDriver::CreateDefaultPassList(disable_passes);
+      PassDriverME::CreateDefaultPassList(disable_passes);
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -942,7 +949,7 @@
 
   if (android_root.empty()) {
     const char* android_root_env_var = getenv("ANDROID_ROOT");
-    if (android_root_env_var == NULL) {
+    if (android_root_env_var == nullptr) {
       Usage("--android-root unspecified and ANDROID_ROOT not set");
     }
     android_root += android_root_env_var;
@@ -959,15 +966,15 @@
     boot_image_option += boot_image_filename;
   }
 
-  if (image_classes_filename != NULL && !image) {
+  if (image_classes_filename != nullptr && !image) {
     Usage("--image-classes should only be used with --image");
   }
 
-  if (image_classes_filename != NULL && !boot_image_option.empty()) {
+  if (image_classes_filename != nullptr && !boot_image_option.empty()) {
     Usage("--image-classes should not be used with --boot-image");
   }
 
-  if (image_classes_zip_filename != NULL && image_classes_filename == NULL) {
+  if (image_classes_zip_filename != nullptr && image_classes_filename == nullptr) {
     Usage("--image-classes-zip should be used with --image-classes");
   }
 
@@ -1009,7 +1016,7 @@
     oat_unstripped += oat_filename;
   }
 
-  if (compiler_filter_string == NULL) {
+  if (compiler_filter_string == nullptr) {
     if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
@@ -1057,7 +1064,7 @@
   WatchDog watch_dog(watch_dog_enabled);
 
   // Check early that the result of compilation can be written
-  UniquePtr<File> oat_file;
+  std::unique_ptr<File> oat_file;
   bool create_file = !oat_unstripped.empty();  // as opposed to using open file descriptor
   if (create_file) {
     oat_file.reset(OS::CreateEmptyFile(oat_unstripped.c_str()));
@@ -1068,7 +1075,7 @@
     oat_file.reset(new File(oat_fd, oat_location));
     oat_file->DisableAutoClose();
   }
-  if (oat_file.get() == NULL) {
+  if (oat_file.get() == nullptr) {
     PLOG(ERROR) << "Failed to create oat file: " << oat_location;
     return EXIT_FAILURE;
   }
@@ -1090,11 +1097,10 @@
     }
     runtime_options.push_back(std::make_pair("bootclasspath", &boot_class_path));
   } else {
-    runtime_options.push_back(std::make_pair(boot_image_option.c_str(),
-                                             reinterpret_cast<void*>(NULL)));
+    runtime_options.push_back(std::make_pair(boot_image_option.c_str(), nullptr));
   }
   for (size_t i = 0; i < runtime_args.size(); i++) {
-    runtime_options.push_back(std::make_pair(runtime_args[i], reinterpret_cast<void*>(NULL)));
+    runtime_options.push_back(std::make_pair(runtime_args[i], nullptr));
   }
 
   VerificationResults verification_results(&compiler_options);
@@ -1118,7 +1124,7 @@
     LOG(ERROR) << "Failed to create dex2oat";
     return EXIT_FAILURE;
   }
-  UniquePtr<Dex2Oat> dex2oat(p_dex2oat);
+  std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
@@ -1130,21 +1136,23 @@
   WellKnownClasses::Init(self->GetJniEnv());
 
   // If --image-classes was specified, calculate the full list of classes to include in the image
-  UniquePtr<CompilerDriver::DescriptorSet> image_classes(NULL);
-  if (image_classes_filename != NULL) {
+  std::unique_ptr<CompilerDriver::DescriptorSet> image_classes(nullptr);
+  if (image_classes_filename != nullptr) {
     std::string error_msg;
-    if (image_classes_zip_filename != NULL) {
+    if (image_classes_zip_filename != nullptr) {
       image_classes.reset(dex2oat->ReadImageClassesFromZip(image_classes_zip_filename,
                                                            image_classes_filename,
                                                            &error_msg));
     } else {
       image_classes.reset(dex2oat->ReadImageClassesFromFile(image_classes_filename));
     }
-    if (image_classes.get() == NULL) {
+    if (image_classes.get() == nullptr) {
       LOG(ERROR) << "Failed to create list of image classes from '" << image_classes_filename <<
           "': " << error_msg;
       return EXIT_FAILURE;
     }
+  } else if (image) {
+    image_classes.reset(new CompilerDriver::DescriptorSet);
   }
 
   std::vector<const DexFile*> dex_files;
@@ -1154,15 +1162,15 @@
     if (dex_filenames.empty()) {
       ATRACE_BEGIN("Opening zip archive from file descriptor");
       std::string error_msg;
-      UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd, zip_location.c_str(),
+      std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd, zip_location.c_str(),
                                                                &error_msg));
-      if (zip_archive.get() == NULL) {
+      if (zip_archive.get() == nullptr) {
         LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location << "': "
             << error_msg;
         return EXIT_FAILURE;
       }
       const DexFile* dex_file = DexFile::Open(*zip_archive.get(), zip_location, &error_msg);
-      if (dex_file == NULL) {
+      if (dex_file == nullptr) {
         LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location
             << "': " << error_msg;
         return EXIT_FAILURE;
@@ -1182,7 +1190,7 @@
       for (size_t i = 0; i < dex_files.size(); ++i) {
         const DexFile* dex_file = dex_files[i];
         std::string tmp_file_name(StringPrintf("/data/local/tmp/dex2oat.%d.%zd.dex", getpid(), i));
-        UniquePtr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
+        std::unique_ptr<File> tmp_file(OS::CreateEmptyFile(tmp_file_name.c_str()));
         if (tmp_file.get() == nullptr) {
             PLOG(ERROR) << "Failed to open file " << tmp_file_name
                         << ". Try: adb shell chmod 777 /data/local/tmp";
@@ -1208,7 +1216,7 @@
     size_t num_methods = 0;
     for (size_t i = 0; i != dex_files.size(); ++i) {
       const DexFile* dex_file = dex_files[i];
-      CHECK(dex_file != NULL);
+      CHECK(dex_file != nullptr);
       num_methods += dex_file->NumMethodIds();
     }
     if (num_methods <= compiler_options.GetNumDexMethodsThreshold()) {
@@ -1217,7 +1225,7 @@
     }
   }
 
-  UniquePtr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
+  std::unique_ptr<const CompilerDriver> compiler(dex2oat->CreateOatFile(boot_image_option,
                                                                   android_root,
                                                                   is_host,
                                                                   dex_files,
@@ -1231,7 +1239,7 @@
                                                                   compiler_phases_timings,
                                                                   profile_file));
 
-  if (compiler.get() == NULL) {
+  if (compiler.get() == nullptr) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
     return EXIT_FAILURE;
   }
@@ -1316,10 +1324,10 @@
   if (oat_unstripped != oat_stripped) {
     timings.NewSplit("dex2oat OatFile copy");
     oat_file.reset();
-     UniquePtr<File> in(OS::OpenFileForReading(oat_unstripped.c_str()));
-    UniquePtr<File> out(OS::CreateEmptyFile(oat_stripped.c_str()));
+     std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped.c_str()));
+    std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped.c_str()));
     size_t buffer_size = 8192;
-    UniquePtr<uint8_t> buffer(new uint8_t[buffer_size]);
+    std::unique_ptr<uint8_t> buffer(new uint8_t[buffer_size]);
     while (true) {
       int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
       if (bytes_read <= 0) {
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 17828fd..814323c 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -46,7 +46,6 @@
   ifeq ($$(art_target_or_host),host)
      LOCAL_IS_HOST_MODULE := true
   endif
-  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-disassembler
@@ -88,6 +87,7 @@
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libcutils libvixl
     include $(BUILD_SHARED_LIBRARY)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 412a052..7c76b3c 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -76,6 +76,13 @@
           "      Example: --boot-image=/system/framework/boot.art\n"
           "\n");
   fprintf(stderr,
+          "  --instruction-set=(arm|arm64|mips|x86|x86_64): for locating the image file based on the image location\n"
+          "      set.\n"
+          "      Example: --instruction-set=x86\n"
+          "      Default: %s\n"
+          "\n",
+          GetInstructionSetString(kRuntimeISA));
+  fprintf(stderr,
           "  --output=<file> may be used to send the output to a file.\n"
           "      Example: --output=/tmp/oatdump.txt\n"
           "\n");
@@ -207,7 +214,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      UniquePtr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -235,7 +242,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != NULL);
       std::string error_msg;
-      UniquePtr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -289,7 +296,7 @@
     // Create the verifier early.
 
     std::string error_msg;
-    UniquePtr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
+    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
     if (dex_file.get() == NULL) {
       os << "NOT FOUND: " << error_msg << "\n\n";
       return;
@@ -417,12 +424,13 @@
       Runtime* runtime = Runtime::Current();
       if (runtime != nullptr) {
         ScopedObjectAccess soa(Thread::Current());
-        SirtRef<mirror::DexCache> dex_cache(
-            soa.Self(), runtime->GetClassLinker()->FindDexCache(dex_file));
-        SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+        StackHandleScope<1> hs(soa.Self());
+        Handle<mirror::DexCache> dex_cache(
+            hs.NewHandle(runtime->GetClassLinker()->FindDexCache(dex_file)));
+        NullHandle<mirror::ClassLoader> class_loader;
         verifier::MethodVerifier verifier(&dex_file, &dex_cache, &class_loader, &class_def,
                                           code_item, dex_method_idx, nullptr, method_access_flags,
-                                          true, true);
+                                          true, true, true);
         verifier.Verify();
         DumpCode(indent2_os, &verifier, oat_method, code_item);
       } else {
@@ -687,11 +695,12 @@
                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccNative) == 0) {
       ScopedObjectAccess soa(Thread::Current());
-      SirtRef<mirror::DexCache> dex_cache(
-          soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file));
-      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+      StackHandleScope<2> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(
+          hs.NewHandle(Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file)));
+      auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
       verifier::MethodVerifier::VerifyMethodAndDump(os, dex_method_idx, dex_file, dex_cache,
-                                                    class_loader, &class_def, code_item, NULL,
+                                                    class_loader, &class_def, code_item, nullptr,
                                                     method_access_flags);
     }
   }
@@ -730,7 +739,7 @@
   bool dump_raw_mapping_table_;
   bool dump_raw_gc_map_;
   std::set<uintptr_t> offsets_;
-  UniquePtr<Disassembler> disassembler_;
+  std::unique_ptr<Disassembler> disassembler_;
 };
 
 class ImageDumper {
@@ -869,7 +878,7 @@
       os_ = saved_os;
     }
     os << "STATS:\n" << std::flush;
-    UniquePtr<File> file(OS::OpenFileForReading(image_filename.c_str()));
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_filename.c_str()));
     if (file.get() == NULL) {
       LOG(WARNING) << "Failed to find image in " << image_filename;
     }
@@ -1145,7 +1154,7 @@
         state->stats_.ComputeOutliers(total_size, expansion, method);
       }
     }
-    state->stats_.Update(ClassHelper(obj_class).GetDescriptor(), object_bytes);
+    state->stats_.Update(obj_class->GetDescriptor().c_str(), object_bytes);
   }
 
   std::set<const void*> already_seen_;
@@ -1188,7 +1197,7 @@
     std::vector<mirror::ArtMethod*> method_outlier;
     std::vector<size_t> method_outlier_size;
     std::vector<double> method_outlier_expansion;
-    std::vector<std::pair<std::string, size_t> > oat_dex_file_sizes;
+    std::vector<std::pair<std::string, size_t>> oat_dex_file_sizes;
 
     explicit Stats()
         : oat_file_bytes(0),
@@ -1436,7 +1445,7 @@
     // threshold, we assume 2 bytes per instruction and 2 instructions per block.
     kLargeMethodDexBytes = 16000
   };
-  UniquePtr<OatDumper> oat_dumper_;
+  std::unique_ptr<OatDumper> oat_dumper_;
   std::ostream* os_;
   gc::space::ImageSpace& image_space_;
   const ImageHeader& image_header_;
@@ -1459,11 +1468,12 @@
   }
 
   const char* oat_filename = NULL;
-  const char* image_filename = NULL;
-  const char* boot_image_filename = NULL;
+  const char* image_location = NULL;
+  const char* boot_image_location = NULL;
+  InstructionSet instruction_set = kRuntimeISA;
   std::string elf_filename_prefix;
   std::ostream* os = &std::cout;
-  UniquePtr<std::ofstream> out;
+  std::unique_ptr<std::ofstream> out;
   bool dump_raw_mapping_table = false;
   bool dump_raw_gc_map = false;
 
@@ -1472,9 +1482,22 @@
     if (option.starts_with("--oat-file=")) {
       oat_filename = option.substr(strlen("--oat-file=")).data();
     } else if (option.starts_with("--image=")) {
-      image_filename = option.substr(strlen("--image=")).data();
+      image_location = option.substr(strlen("--image=")).data();
     } else if (option.starts_with("--boot-image=")) {
-      boot_image_filename = option.substr(strlen("--boot-image=")).data();
+      boot_image_location = option.substr(strlen("--boot-image=")).data();
+    } else if (option.starts_with("--instruction-set=")) {
+      StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data();
+      if (instruction_set_str == "arm") {
+        instruction_set = kThumb2;
+      } else if (instruction_set_str == "arm64") {
+        instruction_set = kArm64;
+      } else if (instruction_set_str == "mips") {
+        instruction_set = kMips;
+      } else if (instruction_set_str == "x86") {
+        instruction_set = kX86;
+      } else if (instruction_set_str == "x86_64") {
+        instruction_set = kX86_64;
+      }
     } else if (option.starts_with("--dump:")) {
         if (option == "--dump:raw_mapping_table") {
           dump_raw_mapping_table = true;
@@ -1498,12 +1521,12 @@
     }
   }
 
-  if (image_filename == NULL && oat_filename == NULL) {
+  if (image_location == NULL && oat_filename == NULL) {
     fprintf(stderr, "Either --image or --oat must be specified\n");
     return EXIT_FAILURE;
   }
 
-  if (image_filename != NULL && oat_filename != NULL) {
+  if (image_location != NULL && oat_filename != NULL) {
     fprintf(stderr, "Either --image or --oat must be specified but not both\n");
     return EXIT_FAILURE;
   }
@@ -1531,24 +1554,27 @@
   NoopCompilerCallbacks callbacks;
   options.push_back(std::make_pair("compilercallbacks", &callbacks));
 
-  if (boot_image_filename != NULL) {
+  if (boot_image_location != NULL) {
     boot_image_option += "-Ximage:";
-    boot_image_option += boot_image_filename;
+    boot_image_option += boot_image_location;
     options.push_back(std::make_pair(boot_image_option.c_str(), reinterpret_cast<void*>(NULL)));
   }
-  if (image_filename != NULL) {
+  if (image_location != NULL) {
     image_option += "-Ximage:";
-    image_option += image_filename;
+    image_option += image_location;
     options.push_back(std::make_pair(image_option.c_str(), reinterpret_cast<void*>(NULL)));
   }
+  options.push_back(
+      std::make_pair("imageinstructionset",
+                     reinterpret_cast<const void*>(GetInstructionSetString(instruction_set))));
 
   if (!Runtime::Create(options, false)) {
     fprintf(stderr, "Failed to create runtime\n");
     return EXIT_FAILURE;
   }
-  UniquePtr<Runtime> runtime(Runtime::Current());
+  std::unique_ptr<Runtime> runtime(Runtime::Current());
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more managable ScopedObjectAccess.
+  // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -1556,7 +1582,7 @@
   CHECK(image_space != NULL);
   const ImageHeader& image_header = image_space->GetImageHeader();
   if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_filename);
+    fprintf(stderr, "Invalid image header %s\n", image_location);
     return EXIT_FAILURE;
   }
   ImageDumper image_dumper(os, *image_space, image_header,
diff --git a/runtime/Android.mk b/runtime/Android.mk
index bc971a9..c2507b1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -61,6 +61,7 @@
 	gc/collector/sticky_mark_sweep.cc \
 	gc/gc_cause.cc \
 	gc/heap.cc \
+	gc/reference_processor.cc \
 	gc/reference_queue.cc \
 	gc/space/bump_pointer_space.cc \
 	gc/space/dlmalloc_space.cc \
@@ -114,6 +115,7 @@
 	native/java_lang_Thread.cc \
 	native/java_lang_Throwable.cc \
 	native/java_lang_VMClassLoader.cc \
+	native/java_lang_ref_Reference.cc \
 	native/java_lang_reflect_Array.cc \
 	native/java_lang_reflect_Constructor.cc \
 	native/java_lang_reflect_Field.cc \
@@ -224,7 +226,7 @@
 	monitor_pool.cc \
 	arch/arm64/fault_handler_arm64.cc
 
-LIBART_TARGET_SRC_FILES_x86 := \
+LIBART_SRC_FILES_x86 := \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
 	arch/x86/jni_entrypoints_x86.S \
@@ -233,7 +235,10 @@
 	arch/x86/thread_x86.cc \
 	arch/x86/fault_handler_x86.cc
 
-LIBART_TARGET_SRC_FILES_x86_64 := \
+LIBART_TARGET_SRC_FILES_x86 := \
+	$(LIBART_SRC_FILES_x86)
+
+LIBART_SRC_FILES_x86_64 := \
 	arch/x86_64/context_x86_64.cc \
 	arch/x86_64/entrypoints_init_x86_64.cc \
 	arch/x86_64/jni_entrypoints_x86_64.S \
@@ -243,6 +248,8 @@
 	monitor_pool.cc \
 	arch/x86_64/fault_handler_x86_64.cc
 
+LIBART_TARGET_SRC_FILES_x86_64 := \
+	$(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
 	arch/mips/context_mips.cc \
@@ -268,31 +275,11 @@
 	runtime_linux.cc \
 	thread_linux.cc
 
-ifeq ($(HOST_ARCH),x86)
-ifneq ($(BUILD_HOST_64bit),)
-LIBART_HOST_SRC_FILES += \
-	arch/x86_64/context_x86_64.cc \
-	arch/x86_64/entrypoints_init_x86_64.cc \
-	arch/x86_64/jni_entrypoints_x86_64.S \
-	arch/x86_64/portable_entrypoints_x86_64.S \
-	arch/x86_64/quick_entrypoints_x86_64.S \
-	arch/x86_64/thread_x86_64.cc \
-	arch/x86_64/fault_handler_x86_64.cc \
-	monitor_pool.cc
-else
-LIBART_HOST_SRC_FILES += \
-	arch/x86/context_x86.cc \
-	arch/x86/entrypoints_init_x86.cc \
-	arch/x86/jni_entrypoints_x86.S \
-	arch/x86/portable_entrypoints_x86.S \
-	arch/x86/quick_entrypoints_x86.S \
-	arch/x86/fault_handler_x86.cc \
-	arch/x86/thread_x86.cc
-endif
-else # HOST_ARCH != x86
-$(error unsupported HOST_ARCH=$(HOST_ARCH))
-endif # HOST_ARCH != x86
+LIBART_HOST_SRC_FILES_32 := \
+	$(LIBART_SRC_FILES_x86)
 
+LIBART_HOST_SRC_FILES_64 := \
+	$(LIBART_SRC_FILES_x86_64)
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
 	arch/x86_64/registers_x86_64.h \
@@ -310,6 +297,7 @@
 	lock_word.h \
 	mirror/class.h \
 	oat.h \
+	object_callbacks.h \
 	quick/inline_method_analyser.h \
 	thread.h \
 	thread_state.h \
@@ -320,6 +308,12 @@
   LIBART_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+  LIBART_CFLAGS += -DUSE_JEMALLOC
+else
+  LIBART_CFLAGS += -DUSE_DLMALLOC
+endif
+
 # $(1): target or host
 # $(2): ndebug or debug
 # $(3): true or false for LOCAL_CLANG
@@ -361,11 +355,11 @@
       LOCAL_SRC_FILES_$(arch) := $$(LIBART_TARGET_SRC_FILES_$(arch)))
   else # host
     LOCAL_SRC_FILES := $(LIBART_HOST_SRC_FILES)
+    LOCAL_SRC_FILES_32 := $(LIBART_HOST_SRC_FILES_32)
+    LOCAL_SRC_FILES_64 := $(LIBART_HOST_SRC_FILES_64)
     LOCAL_IS_HOST_MODULE := true
   endif
 
-  include art/build/Android.libcxx.mk
-
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
@@ -409,7 +403,8 @@
   endif
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES)
   LOCAL_SHARED_LIBRARIES += liblog libnativehelper
-  LOCAL_SHARED_LIBRARIES += libbacktrace # native stack trace support
+  include external/libcxx/libcxx.mk
+  LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils
     LOCAL_STATIC_LIBRARIES := libziparchive libz
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index c285088..45ff21f 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -17,7 +17,8 @@
 #include <stdint.h>
 
 #include "common_runtime_test.h"
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "quick/quick_method_frame_info.h"
 
 namespace art {
 
@@ -30,10 +31,13 @@
     Thread* t = Thread::Current();
     t->TransitionFromSuspendedToRunnable();  // So we can create callee-save methods.
 
-    mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(isa, type);
-    EXPECT_EQ(save_method->GetFrameSizeInBytes(), save_size) << "Expected and real size differs for "
-        << type << " core spills=" << std::hex << save_method->GetCoreSpillMask() << " fp spills="
-        << save_method->GetFpSpillMask() << std::dec;
+    r->SetInstructionSet(isa);
+    mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(type);
+    r->SetCalleeSaveMethod(save_method, type);
+    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
+        << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
+        << frame_info.FpSpillMask() << std::dec;
 
     t->TransitionFromRunnableToSuspended(ThreadState::kNative);  // So we can shut down.
   }
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 0e1b25e..6a337b3 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -16,8 +16,9 @@
 
 #include "context_arm.h"
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
+#include "quick/quick_method_frame_info.h"
 #include "stack.h"
 #include "thread.h"
 
@@ -42,17 +43,15 @@
 
 void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
-  uint32_t core_spills = method->GetCoreSpillMask();
-  uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = POPCOUNT(core_spills);
-  size_t fp_spill_count = POPCOUNT(fp_core_spills);
-  size_t frame_size = method->GetFrameSizeInBytes();
+  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
+  size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask());
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context
     int j = 1;
     for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-      if (((core_spills >> i) & 1) != 0) {
-        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+      if (((frame_info.CoreSpillMask() >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_info.FrameSizeInBytes());
         j++;
       }
     }
@@ -61,8 +60,9 @@
     // Lowest number spill is farthest away, walk registers and fill into context
     int j = 1;
     for (size_t i = 0; i < kNumberOfSRegisters; i++) {
-      if (((fp_core_spills >> i) & 1) != 0) {
-        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size);
+      if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
+        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j,
+                                        frame_info.FrameSizeInBytes());
         j++;
       }
     }
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index eddaa0b..f81e2f9 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -50,7 +50,7 @@
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << *out_sp;
+  VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
   }
@@ -74,7 +74,7 @@
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  VLOG(signals) << "pc: " << std::hex << static_cast<void*>(ptr);
   uint32_t instr_size = GetInstructionSize(ptr);
 
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
@@ -95,7 +95,7 @@
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  LOG(DEBUG) << "Generating null pointer exception";
+  VLOG(signals) << "Generating null pointer exception";
   return true;
 }
 
@@ -117,10 +117,10 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
-  LOG(DEBUG) << "checking suspend";
+  VLOG(signals) << "checking suspend";
 
   uint16_t inst2 = ptr2[0] | ptr2[1] << 8;
-  LOG(DEBUG) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
   if (inst2 != checkinst2) {
     // Second instruction is not good, not ours.
     return false;
@@ -132,7 +132,7 @@
   bool found = false;
   while (ptr1 > limit) {
     uint32_t inst1 = ((ptr1[0] | ptr1[1] << 8) << 16) | (ptr1[2] | ptr1[3] << 8);
-    LOG(DEBUG) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
     if (inst1 == checkinst1) {
       found = true;
       break;
@@ -140,7 +140,7 @@
     ptr1 -= 2;      // Min instruction size is 2 bytes.
   }
   if (found) {
-    LOG(DEBUG) << "suspend check match";
+    VLOG(signals) << "suspend check match";
     // This is a suspend check.  Arrange for the signal handler to return to
     // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
     // will resume the instruction (current PC + 2).  PC points to the
@@ -148,14 +148,14 @@
 
     // NB: remember that we need to set the bottom bit of the LR register
     // to switch to thumb mode.
-    LOG(DEBUG) << "arm lr: " << std::hex << sc->arm_lr;
-    LOG(DEBUG) << "arm pc: " << std::hex << sc->arm_pc;
+    VLOG(signals) << "arm lr: " << std::hex << sc->arm_lr;
+    VLOG(signals) << "arm pc: " << std::hex << sc->arm_pc;
     sc->arm_lr = sc->arm_pc + 3;      // +2 + 1 (for thumb)
     sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
-    LOG(DEBUG) << "removed suspend trigger invoking test suspend";
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
     return true;
   }
   return false;
@@ -174,103 +174,60 @@
 // on the stack.
 //
 // If we determine this is a stack overflow we need to move the stack pointer
-// to the overflow region below the protected region.  Because we now have
-// a gap in the stack (skips over protected region), we need to arrange
-// for the rest of the system to be unaware of the new stack arrangement
-// and behave as if there is a fully valid stack.  We do this by placing
-// a unique address onto the stack followed by
-// the size of the gap.  The stack walker will detect this and skip over the
-// gap.
-
-// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
-// stack must be 8 byte aligned when making any calls.
-
-// NB. The size of the gap is the difference between the previous frame's SP and
-// the SP at which the size word is pushed.
+// to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
-  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+  uintptr_t sp = sc->arm_sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
 
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
-  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
 
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
-    LOG(DEBUG) << "Not a stack overflow";
+    VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  R9 contains the current Thread* so
-  // we can read the stack_end from that and subtract the size of the
-  // protected region.  This creates a gap in the stack that needs to be marked.
-  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  // the exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  uint8_t* prevsp = sp;
-  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
-  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
-
-  // We need to find the previous frame.  Remember that
-  // this has not yet been fully constructed because the SP has not been
-  // decremented.  So we need to work out the size of the spill portion of the
-  // frame.  This consists of something like:
-  //
-  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
-  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
-  //
-  // The first is encoded in the ArtMethod as the spill_mask, the second as the
-  // fp_spill_mask.  A population count on each will give the number of registers
-  // in each mask.  Each register is 4 bytes on ARM32.
-
-  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
-  uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = POPCOUNT(spill_mask);
-  uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = POPCOUNT(fp_spill_mask);
-  uint32_t spill_size = (numcores + numfps) * 4;
-  LOG(DEBUG) << "spill size: " << spill_size;
-  uint8_t* prevframe = prevsp + spill_size;
-  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
-
-  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
-  // is 4 bytes so that, together with the offset to the previous frame is 8
-  // bytes.  On other architectures we will need to align the stack.
-
-  // Push a marker onto the stack to tell the stack walker that there is a stack
-  // overflow and the stack is not contiguous.
-
-  // First the offset from SP to the previous frame.
-  sp -= sizeof(uint32_t);
-  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
-  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
-
-  // Now the gap marker (pointer sized).
-  sp -= sizeof(mirror::ArtMethod*);
-  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
 
   // Now establish the stack pointer for the signal return.
-  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+  sc->arm_sp = prevsp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  // We need the LR to point to the GC map just after the fault instruction.
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->arm_ip = sp;      // aka r12
 
-  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
-  // stack pointer to be in the overflow region.  Throwing the exception will perform
-  // a longjmp which will restore the stack pointer to the correct location for the
-  // exception catch.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->arm_pc.
   return true;
 }
 }       // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bc80644..5212576 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,6 +235,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * r9: thread
+   * sp: address of last known frame
+   * r12: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in r12.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r0, r9                      @ pass Thread::Current
+    mov r1, sp                      @ pass SP
+    mov sp, r12                     @ move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -374,7 +399,7 @@
     @ unlocked case - r2 holds thread id with count of 0
     strex  r3, r2, [r0, #LOCK_WORD_OFFSET]
     cbnz   r3, .Lstrex_fail           @ store failed, retry
-    dmb    ish                        @ full (LoadLoad) memory barrier
+    dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
     bx lr
 .Lstrex_fail:
     b .Lretry_lock                    @ unlikely forward branch, need to reload and recheck r1/r2
@@ -417,8 +442,8 @@
     cmp    r1, #65536
     bpl    .Lrecursive_thin_unlock
     @ transition to unlocked, r3 holds 0
+    dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
     str    r3, [r0, #LOCK_WORD_OFFSET]
-    dmb    ish                        @ full (StoreLoad) memory barrier
     bx     lr
 .Lrecursive_thin_unlock:
     sub    r1, r1, #65536
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
new file mode 100644
index 0000000..8d08190
--- /dev/null
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM_QUICK_METHOD_FRAME_INFO_ARM_H_
+#define ART_RUNTIME_ARCH_ARM_QUICK_METHOD_FRAME_INFO_ARM_H_
+
+#include "quick/quick_method_frame_info.h"
+#include "registers_arm.h"
+#include "runtime.h"  // for Runtime::CalleeSaveType.
+
+namespace art {
+namespace arm {
+
+static constexpr uint32_t kArmCalleeSaveRefSpills =
+    (1 << art::arm::R5) | (1 << art::arm::R6)  | (1 << art::arm::R7) | (1 << art::arm::R8) |
+    (1 << art::arm::R10) | (1 << art::arm::R11);
+static constexpr uint32_t kArmCalleeSaveArgSpills =
+    (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
+static constexpr uint32_t kArmCalleeSaveAllSpills =
+    (1 << art::arm::R4) | (1 << art::arm::R9);
+static constexpr uint32_t kArmCalleeSaveFpAllSpills =
+    (1 << art::arm::S0)  | (1 << art::arm::S1)  | (1 << art::arm::S2)  | (1 << art::arm::S3)  |
+    (1 << art::arm::S4)  | (1 << art::arm::S5)  | (1 << art::arm::S6)  | (1 << art::arm::S7)  |
+    (1 << art::arm::S8)  | (1 << art::arm::S9)  | (1 << art::arm::S10) | (1 << art::arm::S11) |
+    (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15) |
+    (1 << art::arm::S16) | (1 << art::arm::S17) | (1 << art::arm::S18) | (1 << art::arm::S19) |
+    (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
+    (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
+    (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
+
+constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+  return kArmCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) | (1 << art::arm::LR);
+}
+
+constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+  return type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0;
+}
+
+constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+  return RoundUp((POPCOUNT(ArmCalleeSaveCoreSpills(type)) /* gprs */ +
+                  POPCOUNT(ArmCalleeSaveFpSpills(type)) /* fprs */ +
+                  1 /* Method* */) * kArmPointerSize, kStackAlignment);
+}
+
+constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+  return QuickMethodFrameInfo(ArmCalleeSaveFrameSize(type),
+                              ArmCalleeSaveCoreSpills(type),
+                              ArmCalleeSaveFpSpills(type));
+}
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_ARM_QUICK_METHOD_FRAME_INFO_ARM_H_
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 0890fa9..09e8b59 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -18,8 +18,9 @@
 
 #include "context_arm64.h"
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
+#include "quick/quick_method_frame_info.h"
 #include "stack.h"
 #include "thread.h"
 
@@ -45,18 +46,15 @@
 
 void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
-  uint32_t core_spills = method->GetCoreSpillMask();
-  uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = POPCOUNT(core_spills);
-  size_t fp_spill_count = POPCOUNT(fp_core_spills);
-  size_t frame_size = method->GetFrameSizeInBytes();
-
+  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
+  size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask());
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 1;
     for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-      if (((core_spills >> i) & 1) != 0) {
-        gprs_[i] = fr.CalleeSaveAddress(spill_count  - j, frame_size);
+      if (((frame_info.CoreSpillMask() >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count  - j, frame_info.FrameSizeInBytes());
         j++;
       }
     }
@@ -66,8 +64,9 @@
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 1;
     for (size_t i = 0; i < kNumberOfDRegisters; i++) {
-      if (((fp_core_spills >> i) & 1) != 0) {
-        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size);
+      if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
+        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j,
+                                        frame_info.FrameSizeInBytes());
         j++;
       }
     }
@@ -100,14 +99,33 @@
   gprs_[X14] = NULL;
   gprs_[X15] = NULL;
 
-  fprs_[D8] = NULL;
-  fprs_[D9] = NULL;
-  fprs_[D10] = NULL;
-  fprs_[D11] = NULL;
-  fprs_[D12] = NULL;
-  fprs_[D13] = NULL;
-  fprs_[D14] = NULL;
-  fprs_[D15] = NULL;
+  // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
+
+  fprs_[D0] = NULL;
+  fprs_[D1] = NULL;
+  fprs_[D2] = NULL;
+  fprs_[D3] = NULL;
+  fprs_[D4] = NULL;
+  fprs_[D5] = NULL;
+  fprs_[D6] = NULL;
+  fprs_[D7] = NULL;
+
+  fprs_[D16] = NULL;
+  fprs_[D17] = NULL;
+  fprs_[D18] = NULL;
+  fprs_[D19] = NULL;
+  fprs_[D20] = NULL;
+  fprs_[D21] = NULL;
+  fprs_[D22] = NULL;
+  fprs_[D23] = NULL;
+  fprs_[D24] = NULL;
+  fprs_[D25] = NULL;
+  fprs_[D26] = NULL;
+  fprs_[D27] = NULL;
+  fprs_[D28] = NULL;
+  fprs_[D29] = NULL;
+  fprs_[D30] = NULL;
+  fprs_[D31] = NULL;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 2a5c7d1..cb9f53b 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -84,12 +84,6 @@
 // Double-precision FP arithmetics.
 extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
-// Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
-extern "C" int64_t art_quick_mul_long(int64_t, int64_t);
-extern "C" uint64_t art_quick_shl_long(uint64_t, uint32_t);
-extern "C" uint64_t art_quick_shr_long(uint64_t, uint32_t);
-extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
-
 // Intrinsic entrypoints.
 extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -199,10 +193,10 @@
   qpoints->pF2l = NULL;
   qpoints->pLdiv = NULL;
   qpoints->pLmod = NULL;
-  qpoints->pLmul = art_quick_mul_long;
-  qpoints->pShlLong = art_quick_shl_long;
-  qpoints->pShrLong = art_quick_shr_long;
-  qpoints->pUshrLong = art_quick_ushr_long;
+  qpoints->pLmul = NULL;
+  qpoints->pShlLong = NULL;
+  qpoints->pShrLong = NULL;
+  qpoints->pUshrLong = NULL;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 85a2c9e..7f31fb6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -42,7 +42,7 @@
 #endif
 
     // FP args
-    stp d1, d2,   [sp, #8]
+    stp d0, d1, [sp, #8]
     stp d2, d3, [sp, #24]
     stp d4, d5, [sp, #40]
     stp d6, d7, [sp, #56]
@@ -419,43 +419,56 @@
     brk 0  // Unreached
 .endm
 
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
-    ldr x9, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
-    cbnz x9, 1f
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
+    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+    cbnz \reg, 1f
     ret
 1:
     DELIVER_PENDING_EXCEPTION
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9
+.endm
+
+// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
+.endm
+
+.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
+    cbnz w0, 1f                // result non-zero branch over
+    ret                        // return
+1:
+    DELIVER_PENDING_EXCEPTION
+.endm
+
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x0, x19                        // pass Thread::Current
+    mov x0, xSELF                        // pass Thread::Current
     mov x1, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(Thread*, SP)
 END \c_name
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
-    mov x1, x19                       // pass Thread::Current.
+    mov x1, xSELF                       // pass Thread::Current.
     mov x2, sp                        // pass SP.
     b   \cxx_name                     // \cxx_name(arg, Thread*, SP).
     brk 0
 END \c_name
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x2, x19                       // pass Thread::Current
+    mov x2, xSELF                       // pass Thread::Current
     mov x3, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*, SP)
     brk 0
@@ -495,26 +508,42 @@
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
     /*
-     * TODO arm64 specifics need to be fleshed out.
      * All generated callsites for interface invokes and invocation slow paths will load arguments
-     * as usual - except instead of loading x0 with the target Method*, x0 will contain
-     * the method_idx.  This wrapper will save x1-x3, load the caller's Method*, align the
+     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
+     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
      * stack and call the appropriate C helper.
-     * NOTE: "this" is first visible argument of the target, and so can be found in x1.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
      *
-     * The helper will attempt to locate the target and return a result in x0 consisting
+     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
      * of the target Method* in x0 and method->code_ in x1.
      *
-     * If unsuccessful, the helper will return NULL/NULL. There will be a pending exception in the
+     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
      * thread and we branch to another stub to deliver it.
      *
      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
      * pointing back to the original caller.
+     *
+     * Adapted from ARM32 code.
+     *
+     * Clobbers x12.
      */
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
-    brk 0
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    // Helper signature is always
+    // (method_idx, *this_object, *caller_method, *self, sp)
+
+    ldr    x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE]  // pass caller Method*
+    mov    x3, xSELF                      // pass Thread::Current
+    mov    x4, sp
+    bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
+    mov    x12, x1                         // save Method*->code_
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
+    br     x12                             // tail call to target
+1:
+    DELIVER_PENDING_EXCEPTION
 END \c_name
 .endm
 
@@ -975,7 +1004,6 @@
      * failure.
      */
     .extern artHandleFillArrayDataFromCode
-// TODO: xSELF -> x19.
 ENTRY art_quick_handle_fill_data
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // Save callee saves in case exception allocation triggers GC.
     mov    x2, xSELF                       // Pass Thread::Current.
@@ -986,8 +1014,81 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_handle_fill_data
 
-UNIMPLEMENTED art_quick_lock_object
-UNIMPLEMENTED art_quick_unlock_object
+    /*
+     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
+     * possibly null object to lock.
+     *
+     * Derived from arm32 code.
+     */
+    .extern artLockObjectFromCode
+ENTRY art_quick_lock_object
+    cbz    w0, .Lslow_lock
+    add    x4, x0, #LOCK_WORD_OFFSET  // exclusive load/store had no immediate anymore
+.Lretry_lock:
+    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
+    ldxr   w1, [x4]
+    cbnz   w1, .Lnot_unlocked         // already thin locked
+    stxr   w3, w2, [x4]
+    cbnz   w3, .Lstrex_fail           // store failed, retry
+    dmb    ishld                      // full (LoadLoad|LoadStore) memory barrier
+    ret
+.Lstrex_fail:
+    b .Lretry_lock                    // unlikely forward branch, need to reload and recheck r1/r2
+.Lnot_unlocked:
+    lsr    w3, w1, 30
+    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
+    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
+    uxth   w2, w2                     // zero top 16 bits
+    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
+                                      // else contention, go to slow path
+    add    w2, w1, #65536             // increment count in lock word placing in w2 for storing
+    lsr    w1, w2, 30                 // if either of the top two bits are set, we overflowed.
+    cbnz   w1, .Lslow_lock            // if we overflow the count go slow path
+    str    w2, [x0, #LOCK_WORD_OFFSET]// no need for stxr as we hold the lock
+    ret
+.Lslow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    mov    x1, xSELF                  // pass Thread::Current
+    mov    x2, sp                     // pass SP
+    bl     artLockObjectFromCode      // (Object* obj, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_lock_object
+
+    /*
+     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+     * x0 holds the possibly null object to lock.
+     *
+     * Derived from arm32 code.
+     */
+    .extern artUnlockObjectFromCode
+ENTRY art_quick_unlock_object
+    cbz    x0, .Lslow_unlock
+    ldr    w1, [x0, #LOCK_WORD_OFFSET]
+    lsr    w2, w1, 30
+    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
+    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
+    eor    w3, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
+    uxth   w3, w3                     // zero top 16 bits
+    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
+    cmp    w1, #65536
+    bpl    .Lrecursive_thin_unlock
+    // transition to unlocked, w3 holds 0
+    dmb    ish                        // full (LoadStore|StoreStore) memory barrier
+    str    w3, [x0, #LOCK_WORD_OFFSET]
+    ret
+.Lrecursive_thin_unlock:
+    sub    w1, w1, #65536
+    str    w1, [x0, #LOCK_WORD_OFFSET]
+    ret
+.Lslow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    mov    x1, xSELF                  // pass Thread::Current
+    mov    x2, sp                     // pass SP
+    bl     artUnlockObjectFromCode    // (Object* obj, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_unlock_object
 
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
@@ -1150,25 +1251,7 @@
     brk 0                         // Unreached.
 END art_quick_aput_obj
 
-UNIMPLEMENTED art_quick_initialize_static_storage
-UNIMPLEMENTED art_quick_initialize_type
-UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-UNIMPLEMENTED art_quick_get32_static
-UNIMPLEMENTED art_quick_get64_static
-UNIMPLEMENTED art_quick_get_obj_static
-UNIMPLEMENTED art_quick_get32_instance
-UNIMPLEMENTED art_quick_get64_instance
-UNIMPLEMENTED art_quick_get_obj_instance
-UNIMPLEMENTED art_quick_set32_static
-UNIMPLEMENTED art_quick_set64_static
-UNIMPLEMENTED art_quick_set_obj_static
-UNIMPLEMENTED art_quick_set32_instance
-UNIMPLEMENTED art_quick_set64_instance
-UNIMPLEMENTED art_quick_set_obj_instance
-UNIMPLEMENTED art_quick_resolve_string
-
 // Macro to facilitate adding new allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1183,7 +1266,6 @@
 .endm
 
 // Macro to facilitate adding new array allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1197,6 +1279,97 @@
 END \name
 .endm
 
+// Macros taking opportunity of code similarities for downcalls with referrer.
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, xSELF                  // pass Thread::Current
+    mov    x3, sp                     // pass SP
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x4, xSELF                  // pass Thread::Current
+    mov    x5, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+    /*
+     * Entry from managed code when uninitialized static storage, this stub will run the class
+     * initializer and deliver the exception on error. On success the static storage base is
+     * returned.
+     */
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
+UNIMPLEMENTED art_quick_initialize_type
+UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+// This is separated out as the argument order is different.
+    .extern artSet64StaticFromCode
+ENTRY art_quick_set64_static
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x3, x1                     // Store value
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, x3                     // Put value param
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     artSet64StaticFromCode
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_set64_static
+
+    /*
+     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
+     * exception on error. On success the String is returned. x0 holds the referring method,
+     * w1 holds the string index. The fast path check for hit in strings cache has already been
+     * performed.
+     */
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
@@ -1214,7 +1387,7 @@
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
-    ldr  xSELF, [sp, #200]              // Restore self pointer.
+    ldr     xSELF, [sp, #200]           // Restore self pointer.
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME_NO_D0 // keep d0
@@ -1224,19 +1397,27 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
-UNIMPLEMENTED art_quick_imt_conflict_trampoline
-
+    /*
+     * Called to resolve an imt conflict. x12 is a hidden argument that holds the target method's
+     * dex method index.
+     */
+ENTRY art_quick_imt_conflict_trampoline
+    ldr    x0, [sp, #0]                                // load caller Method*
+    ldr    w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
+    add    x0, x0, #OBJECT_ARRAY_DATA_OFFSET           // get starting address of data
+    ldr    w0, [x0, x12, lsl 2]                        // load the target method
+    b art_quick_invoke_interface_trampoline
+END art_quick_imt_conflict_trampoline
 
 ENTRY art_quick_resolution_trampoline
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    mov x19, x0           // save the called method
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
-    mov x9, x0            // Remember returned code pointer in x9.
-    mov x0, x19           // Restore the method, before x19 is restored to on-call value
+    cbz x0, 1f
+    mov x9, x0              // Remember returned code pointer in x9.
+    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    cbz x9, 1f
     br x9
 1:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
@@ -1288,7 +1469,7 @@
  * | RDI/Method*       |  <- X0
  * #-------------------#
  * | local ref cookie  | // 4B
- * |   SIRT size       | // 4B
+ * | handle scope size | // 4B
  * #-------------------#
  * | JNI Call Stack    |
  * #-------------------#    <--- SP on native call
@@ -1315,7 +1496,7 @@
     .cfi_def_cfa_register x28
 
     // This looks the same, but is different: this will be updated to point to the bottom
-    // of the frame when the SIRT is inserted.
+    // of the frame when the handle scope is inserted.
     mov xFP, sp
 
     mov x8, #5120
@@ -1330,7 +1511,7 @@
     mov x1, xFP
     bl artQuickGenericJniTrampoline  // (Thread*, sp)
 
-    // Get the updated pointer. This is the bottom of the frame _with_ SIRT.
+    // Get the updated pointer. This is the bottom of the frame _with_ handle scope.
     ldr xFP, [sp]
     add x9, sp, #8
 
@@ -1430,9 +1611,119 @@
 UNIMPLEMENTED art_quick_instrumentation_entry
 UNIMPLEMENTED art_quick_instrumentation_exit
 UNIMPLEMENTED art_quick_deoptimize
-UNIMPLEMENTED art_quick_mul_long
-UNIMPLEMENTED art_quick_shl_long
-UNIMPLEMENTED art_quick_shr_long
-UNIMPLEMENTED art_quick_ushr_long
 UNIMPLEMENTED art_quick_indexof
-UNIMPLEMENTED art_quick_string_compareto
+
+   /*
+     * String's compareTo.
+     *
+     * TODO: Not very optimized.
+     *
+     * On entry:
+     *    x0:   this object pointer
+     *    x1:   comp object pointer
+     *
+     */
+    .extern __memcmp16
+ENTRY art_quick_string_compareto
+    mov    x2, x0         // x0 is return, use x2 for first input.
+    sub    x0, x2, x1     // Same string object?
+    cbnz   x0,1f
+    ret
+1:                        // Different string objects.
+
+    ldr    w6, [x2, #STRING_OFFSET_OFFSET]
+    ldr    w5, [x1, #STRING_OFFSET_OFFSET]
+    ldr    w4, [x2, #STRING_COUNT_OFFSET]
+    ldr    w3, [x1, #STRING_COUNT_OFFSET]
+    ldr    w2, [x2, #STRING_VALUE_OFFSET]
+    ldr    w1, [x1, #STRING_VALUE_OFFSET]
+
+    /*
+     * Now:           CharArray*    Offset   Count
+     *    first arg      x2          w6        w4
+     *   second arg      x1          w5        w3
+     */
+
+    // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
+    subs x0, x4, x3
+    // Min(count1, count2) into w3.
+    csel x3, x3, x4, ge
+
+    // Build pointer into string data.
+
+    // Add offset in array (substr etc.) (sign extend and << 1).
+    add x2, x2, w6, sxtw #1
+    add x1, x1, w5, sxtw #1
+
+    // Add offset in CharArray to array.
+    add x2, x2, #STRING_DATA_OFFSET
+    add x1, x1, #STRING_DATA_OFFSET
+
+    // Check for long string, do memcmp16 for them.
+    cmp w3, #28  // Constant from arm32.
+    bgt .Ldo_memcmp16
+
+    /*
+     * Now:
+     *   x2: *first string data
+     *   x1: *second string data
+     *   w3: iteration count
+     *   x0: return value if comparison equal
+     *   x4, x5, x6, x7: free
+     */
+
+    // Do a simple unrolled loop.
+.Lloop:
+    // At least two more elements?
+    subs w3, w3, #2
+    b.lt .Lremainder_or_done
+
+    ldrh w4, [x2], #2
+    ldrh w5, [x1], #2
+
+    ldrh w6, [x2], #2
+    ldrh w7, [x1], #2
+
+    subs w4, w4, w5
+    b.ne .Lw4_result
+
+    subs w6, w6, w7
+    b.ne .Lw6_result
+
+    b .Lloop
+
+.Lremainder_or_done:
+    adds w3, w3, #1
+    b.eq .Lremainder
+    ret
+
+.Lremainder:
+    ldrh w4, [x2], #2
+    ldrh w5, [x1], #2
+    subs w4, w4, w5
+    b.ne .Lw4_result
+    ret
+
+// Result is in w4
+.Lw4_result:
+    sxtw x0, w4
+    ret
+
+// Result is in w6
+.Lw6_result:
+    sxtw x0, w6
+    ret
+
+.Ldo_memcmp16:
+    str x0, [sp,#-16]!           // Save x0
+
+    mov x0, x2
+    uxtw x2, w3
+    bl __memcmp16
+
+    ldr x1, [sp], #16            // Restore old x0 = length diff
+
+    cmp x0, #0                   // Check the memcmp difference
+    csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
+    ret
+END art_quick_string_compareto
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
new file mode 100644
index 0000000..cb830ac
--- /dev/null
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_QUICK_METHOD_FRAME_INFO_ARM64_H_
+#define ART_RUNTIME_ARCH_ARM64_QUICK_METHOD_FRAME_INFO_ARM64_H_
+
+#include "quick/quick_method_frame_info.h"
+#include "registers_arm64.h"
+#include "runtime.h"  // for Runtime::CalleeSaveType.
+
+namespace art {
+namespace arm64 {
+
+// Callee saved registers
+static constexpr uint32_t kArm64CalleeSaveRefSpills =
+    (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
+    (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
+    (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
+    (1 << art::arm64::X28);
+// X0 is the method pointer. Not saved.
+static constexpr uint32_t kArm64CalleeSaveArgSpills =
+    (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
+    (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
+    (1 << art::arm64::X7);
+// TODO  This is conservative. Only ALL should include the thread register.
+// The thread register is not preserved by the aapcs64.
+// LR is always saved.
+static constexpr uint32_t kArm64CalleeSaveAllSpills =  0;  // (1 << art::arm64::LR);
+
+// Save callee-saved floating point registers. Rest are scratch/parameters.
+static constexpr uint32_t kArm64CalleeSaveFpArgSpills =
+    (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+    (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+    (1 << art::arm64::D6) | (1 << art::arm64::D7);
+static constexpr uint32_t kArm64CalleeSaveFpRefSpills =
+    (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
+    (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
+    (1 << art::arm64::D14)  | (1 << art::arm64::D15);
+static constexpr uint32_t kArm64FpAllSpills =
+    kArm64CalleeSaveFpArgSpills |
+    (1 << art::arm64::D16)  | (1 << art::arm64::D17) | (1 << art::arm64::D18) |
+    (1 << art::arm64::D19)  | (1 << art::arm64::D20) | (1 << art::arm64::D21) |
+    (1 << art::arm64::D22)  | (1 << art::arm64::D23) | (1 << art::arm64::D24) |
+    (1 << art::arm64::D25)  | (1 << art::arm64::D26) | (1 << art::arm64::D27) |
+    (1 << art::arm64::D28)  | (1 << art::arm64::D29) | (1 << art::arm64::D30) |
+    (1 << art::arm64::D31);
+
+constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+  return kArm64CalleeSaveRefSpills |
+      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) | (1 << art::arm64::FP) |
+      (1 << art::arm64::X18) | (1 << art::arm64::LR);
+}
+
+constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+  return kArm64CalleeSaveFpRefSpills |
+      (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0);
+}
+
+constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+  return RoundUp((POPCOUNT(Arm64CalleeSaveCoreSpills(type)) /* gprs */ +
+                  POPCOUNT(Arm64CalleeSaveFpSpills(type)) /* fprs */ +
+                  1 /* Method* */) * kArm64PointerSize, kStackAlignment);
+}
+
+constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+  return QuickMethodFrameInfo(Arm64CalleeSaveFrameSize(type),
+                              Arm64CalleeSaveCoreSpills(type),
+                              Arm64CalleeSaveFpSpills(type));
+}
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_ARM64_QUICK_METHOD_FRAME_INFO_ARM64_H_
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 2503918..ea346e0 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -56,8 +56,8 @@
   X29 = 29,
   X30 = 30,
   X31 = 31,
-  TR  = 18,     // ART Thread Register - Needs to be one of the callee saved regs.
-  TR1 = 19,     // FIXME!
+  TR  = 18,     // ART Thread Register - Managed Runtime (Caller Saved Reg)
+  ETR = 19,     // ART Thread Register - External Calls  (Callee Saved Reg)
   IP0 = 16,     // Used as scratch by VIXL.
   IP1 = 17,     // Used as scratch by ART JNI Assembler.
   FP  = 29,
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 0950e71..ad28891 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -16,8 +16,9 @@
 
 #include "context_mips.h"
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
+#include "quick/quick_method_frame_info.h"
 #include "stack.h"
 
 namespace art {
@@ -41,17 +42,15 @@
 
 void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
-  uint32_t core_spills = method->GetCoreSpillMask();
-  uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = POPCOUNT(core_spills);
-  size_t fp_spill_count = POPCOUNT(fp_core_spills);
-  size_t frame_size = method->GetFrameSizeInBytes();
+  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
+  size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask());
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 1;
     for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
-      if (((core_spills >> i) & 1) != 0) {
-        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+      if (((frame_info.CoreSpillMask() >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_info.FrameSizeInBytes());
         j++;
       }
     }
@@ -60,8 +59,9 @@
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 1;
     for (size_t i = 0; i < kNumberOfFRegisters; i++) {
-      if (((fp_core_spills >> i) & 1) != 0) {
-        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size);
+      if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
+        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j,
+                                        frame_info.FrameSizeInBytes());
         j++;
       }
     }
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 95fcd73..96e0afd 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -298,7 +298,11 @@
     l.s     $f29, 116($a1)
     l.s     $f30, 120($a1)
     l.s     $f31, 124($a1)
+    .set push
+    .set nomacro
+    .set noat
     lw      $at, 4($a0)
+    .set pop
     lw      $v0, 8($a0)
     lw      $v1, 12($a0)
     lw      $a1, 20($a0)
@@ -322,8 +326,6 @@
     lw      $s7, 92($a0)
     lw      $t8, 96($a0)
     lw      $t9, 100($a0)
-    lw      $k0, 104($a0)
-    lw      $k1, 108($a0)
     lw      $gp, 112($a0)
     lw      $sp, 116($a0)
     lw      $fp, 120($a0)
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
new file mode 100644
index 0000000..2a8bcf0
--- /dev/null
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MIPS_QUICK_METHOD_FRAME_INFO_MIPS_H_
+#define ART_RUNTIME_ARCH_MIPS_QUICK_METHOD_FRAME_INFO_MIPS_H_
+
+#include "quick/quick_method_frame_info.h"
+#include "registers_mips.h"
+#include "runtime.h"  // for Runtime::CalleeSaveType.
+
+namespace art {
+namespace mips {
+
+static constexpr uint32_t kMipsCalleeSaveRefSpills =
+    (1 << art::mips::S2) | (1 << art::mips::S3) | (1 << art::mips::S4) | (1 << art::mips::S5) |
+    (1 << art::mips::S6) | (1 << art::mips::S7) | (1 << art::mips::GP) | (1 << art::mips::FP);
+static constexpr uint32_t kMipsCalleeSaveArgSpills =
+    (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
+static constexpr uint32_t kMipsCalleeSaveAllSpills =
+    (1 << art::mips::S0) | (1 << art::mips::S1);
+
+constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+  return kMipsCalleeSaveRefSpills |
+      (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0) | (1 << art::mips::RA);
+}
+
+constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+  return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
+                  (type == Runtime::kRefsAndArgs ? 0 : 3) + 1 /* Method* */) *
+                 kMipsPointerSize, kStackAlignment);
+}
+
+constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+  return QuickMethodFrameInfo(MipsCalleeSaveFrameSize(type),
+                              MipsCalleeSaveCoreSpills(type),
+                              0u);
+}
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_MIPS_QUICK_METHOD_FRAME_INFO_MIPS_H_
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 8fbca94..fac9883 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -15,6 +15,10 @@
  */
 
 #include "common_runtime_test.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/string-inl.h"
 
 #include <cstdio>
 
@@ -31,10 +35,11 @@
     {
       // Create callee-save methods
       ScopedObjectAccess soa(Thread::Current());
+      runtime_->SetInstructionSet(kRuntimeISA);
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
         Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
         if (!runtime_->HasCalleeSaveMethod(type)) {
-          runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(kRuntimeISA, type), type);
+          runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(type), type);
         }
       }
     }
@@ -47,42 +52,67 @@
         pair.first = "-Xmx4M";  // Smallest we can go.
       }
     }
+    options->push_back(std::make_pair("-Xint", nullptr));
   }
 
+  // Helper function needed since TEST_F makes a new class.
+  Thread::tls_ptr_sized_values* GetTlsPtr(Thread* self) {
+    return &self->tlsPtr_;
+  }
+
+ public:
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, nullptr);
+  }
+
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
+                             mirror::ArtMethod* referrer) {
     // Push a transition back into managed code onto the linked list in thread.
     ManagedStack fragment;
     self->PushManagedStackFragment(&fragment);
 
     size_t result;
+    size_t fpr_result = 0;
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "pushl $0\n\t"               // Push nullptr to terminate quick stack
+        "pushl %[referrer]\n\t"     // Store referrer
         "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"               // Pop nullptr
+        "addl $4, %%esp"            // Pop referrer
         : "=a" (result)
           // Use the result from eax
-        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code)
-          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-        : );  // clobber.
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
     // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
     __asm__ __volatile__(
         "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
         ".cfi_adjust_cfa_offset 52\n\t"
-        "sub sp, sp, #8\n\t"        // +8B, so 16B aligned with nullptr
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "mov r0, %[arg0]\n\t"       // Set arg0-arg2
-        "mov r1, %[arg1]\n\t"       // TODO: Any way to use constraints like on x86?
-        "mov r2, %[arg2]\n\t"
-        // Use r9 last as we don't know whether it was used for arg0-arg2
-        "mov r9, #0\n\t"            // Push nullptr to terminate stack
         "push {r9}\n\t"
         ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, %[self]\n\t"       // Set the thread
-        "blx %[code]\n\t"           // Call the stub
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
         "add sp, sp, #12\n\t"       // Pop nullptr and padding
         ".cfi_adjust_cfa_offset -12\n\t"
         "pop {r1-r12, lr}\n\t"      // Restore state
@@ -90,44 +120,385 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
+        // Spill space for d8 - d15
+        "sub sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset 64\n\t"
+        "stp d8, d9,   [sp]\n\t"
+        "stp d10, d11, [sp, #16]\n\t"
+        "stp d12, d13, [sp, #32]\n\t"
+        "stp d14, d15, [sp, #48]\n\t"
+
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1, [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x18, [sp, #16]\n\t"   // Save x2, x18(xSELF)
-        "str x30, [sp, #32]\n\t"       // Save xLR
-        "mov x0, %[arg0]\n\t"          // Set arg0-arg2
-        "mov x1, %[arg1]\n\t"          // TODO: Any way to use constraints like on x86?
-        "mov x2, %[arg2]\n\t"
-        // Use r18 last as we don't know whether it was used for arg0-arg2
-        "mov x18, %[self]\n\t"         // Set the thread
-        "blr %[code]\n\t"              // Call the stub
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+
+        // Now we definitely have x0-x3 free, use it to garble d8 - d15
+        "movk x0, #0xfad0\n\t"
+        "movk x0, #0xebad, lsl #16\n\t"
+        "movk x0, #0xfad0, lsl #32\n\t"
+        "movk x0, #0xebad, lsl #48\n\t"
+        "fmov d8, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d9, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d10, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d11, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d12, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d13, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d14, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d15, x0\n\t"
+
+        // Load call params
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+
+        "blr x3\n\t"              // Call the stub
+
+        // Test d8 - d15. We can use x1 and x2.
+        "movk x1, #0xfad0\n\t"
+        "movk x1, #0xebad, lsl #16\n\t"
+        "movk x1, #0xfad0, lsl #32\n\t"
+        "movk x1, #0xebad, lsl #48\n\t"
+        "fmov x2, d8\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d9\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d10\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d11\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d12\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d13\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d14\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d15\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+
+        "mov %[fpr_result], #0\n\t"
+
+        // Finish up.
+        "2:\n\t"
         "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x18, x30, [sp, #24]\n\t"  // Restore xSELF, xLR
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"       // Restore xLR
         "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
         "mov %[result], x0\n\t"        // Save the result
-        : [result] "=r" (result)
+
+        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
+        "ldp d10, d11, [sp, #16]\n\t"
+        "ldp d12, d13, [sp, #32]\n\t"
+        "ldp d14, d15, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset -64\n\t"
+
+        "b 3f\n\t"                     // Goto end
+
+        // Failed fpr verification.
+        "1:\n\t"
+        "mov %[fpr_result], #1\n\t"
+        "b 2b\n\t"                     // Goto finish-up
+
+        // End
+        "3:\n\t"
+        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
           // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "pushq $0\n\t"                 // Push nullptr to terminate quick stack
-        "pushq $0\n\t"                 // 16B alignment padding
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"             // & 16B alignment padding
         ".cfi_adjust_cfa_offset 16\n\t"
         "call *%%rax\n\t"              // Call the stub
-        "addq $16, %%rsp\n\t"              // Pop nullptr and padding
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
         ".cfi_adjust_cfa_offset -16\n\t"
         : "=a" (result)
           // Use the result from rax
-        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
-          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
+    return result;
+  }
+
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrerAndHidden(size_t arg0, size_t arg1, size_t arg2, uintptr_t code,
+                                      Thread* self, mirror::ArtMethod* referrer, size_t hidden) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+    size_t fpr_result = 0;
+#if defined(__i386__)
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "movd %[hidden], %%xmm0\n\t"
+        "pushl %[referrer]\n\t"     // Store referrer
+        "call *%%edi\n\t"           // Call the stub
+        "addl $4, %%esp"            // Pop referrer
+        : "=a" (result)
+          // Use the result from eax
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
+    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
+    //       but compilation fails when declaring that.
+#elif defined(__arm__)
+    __asm__ __volatile__(
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "push {r9}\n\t"
+        ".cfi_adjust_cfa_offset 4\n\t"
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #24\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "str %[hidden], [sp, #20]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "ldr r12, [sp, #20]\n\t"
+        "add sp, sp, #24\n\t"
+
+        "blx r3\n\t"                // Call the stub
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
+        "mov %[result], r0\n\t"     // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+          : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+            [referrer] "r"(referrer), [hidden] "r"(hidden)
+            : );  // clobber.
+#elif defined(__aarch64__)
+    __asm__ __volatile__(
+        // Spill space for d8 - d15
+        "sub sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset 64\n\t"
+        "stp d8, d9,   [sp]\n\t"
+        "stp d10, d11, [sp, #16]\n\t"
+        "stp d12, d13, [sp, #32]\n\t"
+        "stp d14, d15, [sp, #48]\n\t"
+
+        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "str %[hidden], [sp, #40]\n\t"
+
+        // Now we definitely have x0-x3 free, use it to garble d8 - d15
+        "movk x0, #0xfad0\n\t"
+        "movk x0, #0xebad, lsl #16\n\t"
+        "movk x0, #0xfad0, lsl #32\n\t"
+        "movk x0, #0xebad, lsl #48\n\t"
+        "fmov d8, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d9, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d10, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d11, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d12, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d13, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d14, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d15, x0\n\t"
+
+        // Load call params
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "ldr x12, [sp, #40]\n\t"
+        "add sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+
+        "blr x3\n\t"              // Call the stub
+
+        // Test d8 - d15. We can use x1 and x2.
+        "movk x1, #0xfad0\n\t"
+        "movk x1, #0xebad, lsl #16\n\t"
+        "movk x1, #0xfad0, lsl #32\n\t"
+        "movk x1, #0xebad, lsl #48\n\t"
+        "fmov x2, d8\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d9\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d10\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d11\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d12\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d13\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d14\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d15\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+
+        "mov %[fpr_result], #0\n\t"
+
+        // Finish up.
+        "2:\n\t"
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"       // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
+        "mov %[result], x0\n\t"        // Save the result
+
+        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
+        "ldp d10, d11, [sp, #16]\n\t"
+        "ldp d12, d13, [sp, #32]\n\t"
+        "ldp d14, d15, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset -64\n\t"
+
+        "b 3f\n\t"                     // Goto end
+
+        // Failed fpr verification.
+        "1:\n\t"
+        "mov %[fpr_result], #1\n\t"
+        "b 2b\n\t"                     // Goto finish-up
+
+        // End
+        "3:\n\t"
+        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
+        // Use the result from r0
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+#elif defined(__x86_64__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "movq %[hidden], %%r9\n\t"     // No need to save r9, listed as clobbered
+        "movd %%r9, %%xmm0\n\t"
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"            // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+        // Use the result from rax
+        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden)
+        // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
         : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
@@ -136,8 +507,42 @@
 #endif
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
     return result;
   }
+
+  // Method with 32b arg0, 64b arg1
+  size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
+                              mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
+#else
+    // Need to split up arguments.
+    uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF);
+    uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF);
+
+    return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer);
+#endif
+  }
+
+  // Method with 32b arg0, 32b arg1, 64b arg2
+  size_t Invoke3UUWithReferrer(uint32_t arg0, uint32_t arg1, uint64_t arg2, uintptr_t code,
+                               Thread* self, mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, referrer);
+#else
+    // TODO: Needs 4-param invoke.
+    return 0;
+#endif
+  }
+
+ protected:
+  size_t fp_result;
 };
 
 
@@ -182,40 +587,61 @@
 #endif
 }
 
-
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_lock_object(void);
 #endif
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  static constexpr size_t kThinLockLoops = 100;
+
   Thread* self = Thread::Current();
   // Create an object
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  SirtRef<mirror::String> obj(soa.Self(),
-                              mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::String> obj(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
   LockWord lock = obj->GetLockWord(false);
   LockWord::LockState old_state = lock.GetState();
   EXPECT_EQ(LockWord::LockState::kUnlocked, old_state);
 
-  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+  Invoke3(reinterpret_cast<size_t>(obj.Get()), 0U, 0U,
           reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
 
   LockWord lock_after = obj->GetLockWord(false);
   LockWord::LockState new_state = lock_after.GetState();
   EXPECT_EQ(LockWord::LockState::kThinLocked, new_state);
+  EXPECT_EQ(lock_after.ThinLockCount(), 0U);  // Thin lock starts count at zero
 
-  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+  for (size_t i = 1; i < kThinLockLoops; ++i) {
+    Invoke3(reinterpret_cast<size_t>(obj.Get()), 0U, 0U,
+              reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+
+    // Check we're at lock count i
+
+    LockWord l_inc = obj->GetLockWord(false);
+    LockWord::LockState l_inc_state = l_inc.GetState();
+    EXPECT_EQ(LockWord::LockState::kThinLocked, l_inc_state);
+    EXPECT_EQ(l_inc.ThinLockCount(), i);
+  }
+
+  // Force a fat lock by running identity hashcode to fill up lock word.
+  Handle<mirror::String> obj2(hs.NewHandle(
+      mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
+
+  obj2->IdentityHashCode();
+
+  Invoke3(reinterpret_cast<size_t>(obj2.Get()), 0U, 0U,
           reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
 
-  LockWord lock_after2 = obj->GetLockWord(false);
+  LockWord lock_after2 = obj2->GetLockWord(false);
   LockWord::LockState new_state2 = lock_after2.GetState();
-  EXPECT_EQ(LockWord::LockState::kThinLocked, new_state2);
+  EXPECT_EQ(LockWord::LockState::kFatLocked, new_state2);
+  EXPECT_NE(lock_after2.FatLockMonitor(), static_cast<Monitor*>(nullptr));
 
-  // TODO: Improve this test. Somehow force it to go to fat locked. But that needs another thread.
-
+  // Test done.
 #else
   LOG(INFO) << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
@@ -224,6 +650,172 @@
 }
 
 
+class RandGen {
+ public:
+  explicit RandGen(uint32_t seed) : val_(seed) {}
+
+  uint32_t next() {
+    val_ = val_ * 48271 % 2147483647 + 13;
+    return val_;
+  }
+
+  uint32_t val_;
+};
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_lock_object(void);
+extern "C" void art_quick_unlock_object(void);
+#endif
+
+// NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
+static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  static constexpr size_t kThinLockLoops = 100;
+
+  Thread* self = Thread::Current();
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+  static constexpr size_t kNumberOfLocks = 10;  // Number of objects = lock
+  StackHandleScope<kNumberOfLocks + 1> hs(self);
+  Handle<mirror::String> obj(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
+  LockWord lock = obj->GetLockWord(false);
+  LockWord::LockState old_state = lock.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, old_state);
+
+  test->Invoke3(reinterpret_cast<size_t>(obj.Get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+  // This should be an illegal monitor state.
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
+
+  LockWord lock_after = obj->GetLockWord(false);
+  LockWord::LockState new_state = lock_after.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, new_state);
+
+  test->Invoke3(reinterpret_cast<size_t>(obj.Get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+
+  LockWord lock_after2 = obj->GetLockWord(false);
+  LockWord::LockState new_state2 = lock_after2.GetState();
+  EXPECT_EQ(LockWord::LockState::kThinLocked, new_state2);
+
+  test->Invoke3(reinterpret_cast<size_t>(obj.Get()), 0U, 0U,
+                reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+
+  LockWord lock_after3 = obj->GetLockWord(false);
+  LockWord::LockState new_state3 = lock_after3.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, new_state3);
+
+  // Stress test:
+  // Keep a number of objects and their locks in flight. Randomly lock or unlock one of them in
+  // each step.
+
+  RandGen r(0x1234);
+
+  constexpr size_t kIterations = 10000;  // Number of iterations
+  constexpr size_t kMoveToFat = 1000;     // Chance of 1:kMoveFat to make a lock fat.
+
+  size_t counts[kNumberOfLocks];
+  bool fat[kNumberOfLocks];  // Whether a lock should be thin or fat.
+  Handle<mirror::String> objects[kNumberOfLocks];
+
+  // Initialize = allocate.
+  for (size_t i = 0; i < kNumberOfLocks; ++i) {
+    counts[i] = 0;
+    fat[i] = false;
+    objects[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), ""));
+  }
+
+  for (size_t i = 0; i < kIterations; ++i) {
+    // Select which lock to update.
+    size_t index = r.next() % kNumberOfLocks;
+
+    // Make lock fat?
+    if (!fat[index] && (r.next() % kMoveToFat == 0)) {
+      fat[index] = true;
+      objects[index]->IdentityHashCode();
+
+      LockWord lock_iter = objects[index]->GetLockWord(false);
+      LockWord::LockState iter_state = lock_iter.GetState();
+      if (counts[index] == 0) {
+        EXPECT_EQ(LockWord::LockState::kHashCode, iter_state);
+      } else {
+        EXPECT_EQ(LockWord::LockState::kFatLocked, iter_state);
+      }
+    } else {
+      bool lock;  // Whether to lock or unlock in this step.
+      if (counts[index] == 0) {
+        lock = true;
+      } else if (counts[index] == kThinLockLoops) {
+        lock = false;
+      } else {
+        // Randomly.
+        lock = r.next() % 2 == 0;
+      }
+
+      if (lock) {
+        test->Invoke3(reinterpret_cast<size_t>(objects[index].Get()), 0U, 0U,
+                       reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+        counts[index]++;
+      } else {
+        test->Invoke3(reinterpret_cast<size_t>(objects[index].Get()), 0U, 0U,
+                      reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+        counts[index]--;
+      }
+
+      EXPECT_FALSE(self->IsExceptionPending());
+
+      // Check the new state.
+      LockWord lock_iter = objects[index]->GetLockWord(true);
+      LockWord::LockState iter_state = lock_iter.GetState();
+      if (fat[index]) {
+        // Abuse MonitorInfo.
+        EXPECT_EQ(LockWord::LockState::kFatLocked, iter_state) << index;
+        MonitorInfo info(objects[index].Get());
+        EXPECT_EQ(counts[index], info.entry_count_) << index;
+      } else {
+        if (counts[index] > 0) {
+          EXPECT_EQ(LockWord::LockState::kThinLocked, iter_state);
+          EXPECT_EQ(counts[index] - 1, lock_iter.ThinLockCount());
+        } else {
+          EXPECT_EQ(LockWord::LockState::kUnlocked, iter_state);
+        }
+      }
+    }
+  }
+
+  // Unlock the remaining count times and then check it's unlocked. Then deallocate.
+  // Go reverse order to correctly handle Handles.
+  for (size_t i = 0; i < kNumberOfLocks; ++i) {
+    size_t index = kNumberOfLocks - 1 - i;
+    size_t count = counts[index];
+    while (count > 0) {
+      test->Invoke3(reinterpret_cast<size_t>(objects[index].Get()), 0U, 0U,
+                    reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+      count--;
+    }
+
+    LockWord lock_after4 = objects[index]->GetLockWord(false);
+    LockWord::LockState new_state4 = lock_after4.GetState();
+    EXPECT_TRUE(LockWord::LockState::kUnlocked == new_state4
+                || LockWord::LockState::kFatLocked == new_state4);
+  }
+
+  // Test done.
+#else
+  LOG(INFO) << "Skipping unlock_object as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping unlock_object as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+TEST_F(StubTest, UnlockObject) {
+  TestUnlockObject(this);
+}
+
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_check_cast(void);
 #endif
@@ -235,31 +827,32 @@
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                          "[Ljava/lang/Object;"));
-  SirtRef<mirror::Class> c2(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                            "[Ljava/lang/String;"));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
+  Handle<mirror::Class> c2(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;")));
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(c.get()), 0U,
+  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
           reinterpret_cast<uintptr_t>(&art_quick_check_cast), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c2.get()), reinterpret_cast<size_t>(c2.get()), 0U,
+  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
           reinterpret_cast<uintptr_t>(&art_quick_check_cast), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(c2.get()), 0U,
+  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
           reinterpret_cast<uintptr_t>(&art_quick_check_cast), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
 
   // TODO: Make the following work. But that would require correct managed frames.
 
-  Invoke3(reinterpret_cast<size_t>(c2.get()), reinterpret_cast<size_t>(c.get()), 0U,
+  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
           reinterpret_cast<uintptr_t>(&art_quick_check_cast), self);
 
   EXPECT_TRUE(self->IsExceptionPending());
@@ -287,23 +880,22 @@
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                            "Ljava/lang/Object;"));
-  SirtRef<mirror::Class> c2(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                            "Ljava/lang/String;"));
-  SirtRef<mirror::Class> ca(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                            "[Ljava/lang/String;"));
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  Handle<mirror::Class> ca(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;")));
 
   // Build a string array of size 1
-  SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
-            mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.get(), 10));
+  Handle<mirror::ObjectArray<mirror::Object>> array(
+      hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.Get(), 10)));
 
   // Build a string -> should be assignable
-  SirtRef<mirror::Object> str_obj(soa.Self(),
-                                  mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"));
+  Handle<mirror::String> str_obj(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
 
   // Build a generic object -> should fail assigning
-  SirtRef<mirror::Object> obj_obj(soa.Self(), c->AllocObject(soa.Self()));
+  Handle<mirror::Object> obj_obj(hs.NewHandle(c->AllocObject(soa.Self())));
 
   // Play with it...
 
@@ -312,51 +904,51 @@
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 0U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 0U, reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
-  EXPECT_EQ(str_obj.get(), array->Get(0));
+  EXPECT_EQ(str_obj.Get(), array->Get(0));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 1U, reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
-  EXPECT_EQ(str_obj.get(), array->Get(1));
+  EXPECT_EQ(str_obj.Get(), array->Get(1));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 2U, reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
-  EXPECT_EQ(str_obj.get(), array->Get(2));
+  EXPECT_EQ(str_obj.Get(), array->Get(2));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 3U, reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
-  EXPECT_EQ(str_obj.get(), array->Get(3));
+  EXPECT_EQ(str_obj.Get(), array->Get(3));
 
   // 1.2) Assign null to array[0..3]
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 0U, reinterpret_cast<size_t>(nullptr),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 0U, reinterpret_cast<size_t>(nullptr),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(nullptr, array->Get(0));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(nullptr),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 1U, reinterpret_cast<size_t>(nullptr),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(nullptr, array->Get(1));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(nullptr),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 2U, reinterpret_cast<size_t>(nullptr),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(nullptr, array->Get(2));
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(nullptr),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 3U, reinterpret_cast<size_t>(nullptr),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
@@ -368,7 +960,7 @@
   // 2.1) Array = null
   // TODO: Throwing NPE needs actual DEX code
 
-//  Invoke3(reinterpret_cast<size_t>(nullptr), 0U, reinterpret_cast<size_t>(str_obj.get()),
+//  Invoke3(reinterpret_cast<size_t>(nullptr), 0U, reinterpret_cast<size_t>(str_obj.Get()),
 //          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 //
 //  EXPECT_TRUE(self->IsExceptionPending());
@@ -376,8 +968,8 @@
 
   // 2.2) Index < 0
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), static_cast<size_t>(-1),
-          reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), static_cast<size_t>(-1),
+          reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_TRUE(self->IsExceptionPending());
@@ -385,7 +977,7 @@
 
   // 2.3) Index > 0
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 10U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 10U, reinterpret_cast<size_t>(str_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_TRUE(self->IsExceptionPending());
@@ -393,7 +985,7 @@
 
   // 3) Failure cases (obj into str[])
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 0U, reinterpret_cast<size_t>(obj_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.Get()), 0U, reinterpret_cast<size_t>(obj_obj.Get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_TRUE(self->IsExceptionPending());
@@ -407,13 +999,6 @@
 #endif
 }
 
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_object_rosalloc(void);
-extern "C" void art_quick_alloc_object_resolved_rosalloc(void);
-extern "C" void art_quick_alloc_object_initialized_rosalloc(void);
-#endif
-
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
@@ -425,53 +1010,53 @@
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                      "Ljava/lang/Object;"));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
 
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-
   {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c->GetVirtualMethod(0)),  // arbitrary
                             0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObject),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
-    EXPECT_EQ(c.get(), obj->GetClass());
+    EXPECT_EQ(c.Get(), obj->GetClass());
     VerifyObject(obj);
   }
 
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_resolved_rosalloc),
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectResolved),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
-    EXPECT_EQ(c.get(), obj->GetClass());
+    EXPECT_EQ(c.Get(), obj->GetClass());
     VerifyObject(obj);
   }
 
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
-    EXPECT_EQ(c.get(), obj->GetClass());
+    EXPECT_EQ(c.Get(), obj->GetClass());
     VerifyObject(obj);
   }
 
@@ -482,19 +1067,21 @@
     Runtime::Current()->GetHeap()->SetIdealFootprint(1 * GB);
 
     // Array helps to fill memory faster.
-    SirtRef<mirror::Class> ca(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                         "[Ljava/lang/Object;"));
-    std::vector<SirtRef<mirror::Object>*> sirt_refs;
+    Handle<mirror::Class> ca(
+        hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
+
+    // Use arbitrary large amount for now.
+    static const size_t kMaxHandles = 1000000;
+    std::unique_ptr<StackHandleScope<kMaxHandles>> hsp(new StackHandleScope<kMaxHandles>(self));
+
+    std::vector<Handle<mirror::Object>> handles;
     // Start allocating with 128K
     size_t length = 128 * KB / 4;
     while (length > 10) {
-      SirtRef<mirror::Object>* ref = new SirtRef<mirror::Object>(soa.Self(),
-                                              mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(),
-                                                                                         ca.get(),
-                                                                                         length/4));
-      if (self->IsExceptionPending() || ref->get() == nullptr) {
+      Handle<mirror::Object> h(hsp->NewHandle<mirror::Object>(
+          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.Get(), length / 4)));
+      if (self->IsExceptionPending() || h.Get() == nullptr) {
         self->ClearException();
-        delete ref;
 
         // Try a smaller length
         length = length / 8;
@@ -504,38 +1091,26 @@
           length = mem / 8;
         }
       } else {
-        sirt_refs.push_back(ref);
+        handles.push_back(h);
       }
     }
-    LOG(DEBUG) << "Used " << sirt_refs.size() << " arrays to fill space.";
+    LOG(INFO) << "Used " << handles.size() << " arrays to fill space.";
 
     // Allocate simple objects till it fails.
     while (!self->IsExceptionPending()) {
-      SirtRef<mirror::Object>* ref = new SirtRef<mirror::Object>(soa.Self(),
-                                                                 c->AllocObject(soa.Self()));
-      if (!self->IsExceptionPending() && ref->get() != nullptr) {
-        sirt_refs.push_back(ref);
-      } else {
-        delete ref;
+      Handle<mirror::Object> h = hsp->NewHandle(c->AllocObject(soa.Self()));
+      if (!self->IsExceptionPending() && h.Get() != nullptr) {
+        handles.push_back(h);
       }
     }
     self->ClearException();
 
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
                             self);
-
     EXPECT_TRUE(self->IsExceptionPending());
     self->ClearException();
     EXPECT_EQ(reinterpret_cast<size_t>(nullptr), result);
-
-    // Release all the allocated objects.
-    // Need to go backward to release SirtRef in the right order.
-    auto it = sirt_refs.rbegin();
-    auto end = sirt_refs.rend();
-    for (; it != end; ++it) {
-      delete *it;
-    }
   }
 
   // Tests done.
@@ -546,12 +1121,6 @@
 #endif
 }
 
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_array_rosalloc(void);
-extern "C" void art_quick_alloc_array_resolved_rosalloc(void);
-#endif
-
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
@@ -563,49 +1132,49 @@
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                        "[Ljava/lang/Object;"));
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
 
   // Needed to have a linked method.
-  SirtRef<mirror::Class> c_obj(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                          "Ljava/lang/Object;"));
+  Handle<mirror::Class> c_obj(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
 
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-/*
- * For some reason this does not work, as the type_idx is artificial and outside what the
- * resolved types of c_obj allow...
- *
-  {
+
+  // For some reason this does not work, as the type_idx is artificial and outside what the
+  // resolved types of c_obj allow...
+
+  if (false) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
                             10U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArray),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Array* obj = reinterpret_cast<mirror::Array*>(result);
-    EXPECT_EQ(c.get(), obj->GetClass());
+    EXPECT_EQ(c.Get(), obj->GetClass());
     VerifyObject(obj);
     EXPECT_EQ(obj->GetLength(), 10);
   }
-*/
+
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 10U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr), 10U,
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
                             self);
-
-    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
     EXPECT_TRUE(obj->IsArrayInstance());
     EXPECT_TRUE(obj->IsObjectArray());
-    EXPECT_EQ(c.get(), obj->GetClass());
+    EXPECT_EQ(c.Get(), obj->GetClass());
     VerifyObject(obj);
     mirror::Array* array = reinterpret_cast<mirror::Array*>(result);
     EXPECT_EQ(array->GetLength(), 10);
@@ -615,9 +1184,9 @@
 
   // Out-of-memory.
   {
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr),
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(nullptr),
                             GB,  // that should fail...
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
                             self);
 
     EXPECT_TRUE(self->IsExceptionPending());
@@ -634,14 +1203,14 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_string_compareto(void);
 #endif
 
 TEST_F(StubTest, StringCompareTo) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -650,38 +1219,55 @@
 
   // Create some strings
   // Use array so we can index into it and use a matrix for expected results
-  constexpr size_t string_count = 7;
-  const char* c[string_count] = { "", "", "a", "aa", "ab", "aac", "aac" };
+  // Setup: The first half is standard. The second half uses a non-zero offset.
+  // TODO: Shared backing arrays.
+  static constexpr size_t kBaseStringCount  = 7;
+  const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab", "aac", "aac" , };
 
-  SirtRef<mirror::String>* s[string_count];
+  static constexpr size_t kStringCount = 2 * kBaseStringCount;
 
-  for (size_t i = 0; i < string_count; ++i) {
-    s[i] = new SirtRef<mirror::String>(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(),
-                                                                                         c[i]));
+  StackHandleScope<kStringCount> hs(self);
+  Handle<mirror::String> s[kStringCount];
+
+  for (size_t i = 0; i < kBaseStringCount; ++i) {
+    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i]));
+  }
+
+  RandGen r(0x1234);
+
+  for (size_t i = kBaseStringCount; i < kStringCount; ++i) {
+    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i - kBaseStringCount]));
+    int32_t length = s[i]->GetLength();
+    if (length > 1) {
+      // Set a random offset and length.
+      int32_t new_offset = 1 + (r.next() % (length - 1));
+      int32_t rest = length - new_offset - 1;
+      int32_t new_length = 1 + (rest > 0 ? r.next() % rest : 0);
+
+      s[i]->SetField32<false>(mirror::String::CountOffset(), new_length);
+      s[i]->SetField32<false>(mirror::String::OffsetOffset(), new_offset);
+    }
   }
 
   // TODO: wide characters
 
   // Matrix of expectations. First component is first parameter. Note we only check against the
-  // sign, not the value.
-  int32_t expected[string_count][string_count] = {
-      {  0,  0, -1, -1, -1, -1, -1 },  // ""
-      {  0,  0, -1, -1, -1, -1, -1 },  // ""
-      {  1,  1,  0, -1, -1, -1, -1 },  // "a"
-      {  1,  1,  1,  0, -1, -1, -1 },  // "aa"
-      {  1,  1,  1,  1,  0,  1,  1 },  // "ab"
-      {  1,  1,  1,  1, -1,  0,  0 },  // "aac"
-      {  1,  1,  1,  1, -1,  0,  0 }   // "aac"
-  //    ""  ""   a  aa  ab  aac aac
-  };
+  // sign, not the value. As we are testing random offsets, we need to compute this and need to
+  // rely on String::CompareTo being correct.
+  int32_t expected[kStringCount][kStringCount];
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kStringCount; ++y) {
+      expected[x][y] = s[x]->CompareTo(s[y].Get());
+    }
+  }
 
   // Play with it...
 
-  for (size_t x = 0; x < string_count; ++x) {
-    for (size_t y = 0; y < string_count; ++y) {
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kStringCount; ++y) {
       // Test string_compareto x y
-      size_t result = Invoke3(reinterpret_cast<size_t>(s[x]->get()),
-                              reinterpret_cast<size_t>(s[y]->get()), 0U,
+      size_t result = Invoke3(reinterpret_cast<size_t>(s[x].Get()),
+                              reinterpret_cast<size_t>(s[y].Get()), 0U,
                               reinterpret_cast<uintptr_t>(&art_quick_string_compareto), self);
 
       EXPECT_FALSE(self->IsExceptionPending());
@@ -693,12 +1279,17 @@
       } conv;
       conv.r = result;
       int32_t e = expected[x][y];
-      EXPECT_TRUE(e == 0 ? conv.i == 0 : true) << "x=" << c[x] << " y=" << c[y];
-      EXPECT_TRUE(e < 0 ? conv.i < 0 : true)   << "x=" << c[x] << " y="  << c[y];
-      EXPECT_TRUE(e > 0 ? conv.i > 0 : true)   << "x=" << c[x] << " y=" << c[y];
+      EXPECT_TRUE(e == 0 ? conv.i == 0 : true) << "x=" << c[x] << " y=" << c[y] << " res=" <<
+          conv.r;
+      EXPECT_TRUE(e < 0 ? conv.i < 0 : true)   << "x=" << c[x] << " y="  << c[y] << " res=" <<
+          conv.r;
+      EXPECT_TRUE(e > 0 ? conv.i > 0 : true)   << "x=" << c[x] << " y=" << c[y] << " res=" <<
+          conv.r;
     }
   }
 
+  // TODO: Deallocate things.
+
   // Tests done.
 #else
   LOG(INFO) << "Skipping string_compareto as I don't know how to do that on " << kRuntimeISA;
@@ -708,4 +1299,496 @@
 #endif
 }
 
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_static(void);
+extern "C" void art_quick_get32_static(void);
+#endif
+
+static void GetSet32Static(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              static_cast<size_t>(values[i]),
+                              0U,
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_static),
+                              self,
+                              referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get32_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set32static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_instance(void);
+extern "C" void art_quick_get32_instance(void);
+#endif
+
+static void GetSet32Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->Get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_instance),
+                              self,
+                              referrer);
+
+    int32_t res = f->Get()->GetInt(obj->Get());
+    EXPECT_EQ(res, static_cast<int32_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->Get()->SetInt<false>(obj->Get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->Get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get32_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int32_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_static(void);
+extern "C" void art_quick_get_obj_static(void);
+
+static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
+                                 mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                            reinterpret_cast<size_t>(val),
+                            0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_static),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                                         0U, 0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_static),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+}
+#endif
+
+static void GetSetObjStatic(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
+                            mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_static((*f)->GetDexFieldIndex(), str, self, referrer, test);
+
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_instance(void);
+extern "C" void art_quick_get_obj_instance(void);
+
+static void set_and_check_instance(Handle<mirror::ArtField>* f, mirror::Object* trg,
+                                   mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
+                                   StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                            reinterpret_cast<size_t>(trg),
+                            reinterpret_cast<size_t>(val),
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_instance),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                         reinterpret_cast<size_t>(trg),
+                                         0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_instance),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+
+  EXPECT_EQ(val, f->Get()->GetObj(trg));
+}
+#endif
+
+static void GetSetObjInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_instance(f, obj->Get(), str, self, referrer, test);
+
+  set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+// TODO: Complete these tests for 32b architectures.
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_static(void);
+extern "C" void art_quick_get64_static(void);
+#endif
+
+static void GetSet64Static(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                               values[i],
+                               reinterpret_cast<uintptr_t>(&art_quick_set64_static),
+                               self,
+                               referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get64_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set64static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_instance(void);
+extern "C" void art_quick_get64_instance(void);
+#endif
+
+static void GetSet64Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->Get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set64_instance),
+                              self,
+                              referrer);
+
+    int64_t res = f->Get()->GetLong(obj->Get());
+    EXPECT_EQ(res, static_cast<int64_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->Get()->SetLong<false>(obj->Get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->Get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get64_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int64_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+static void TestFields(Thread* self, StubTest* test, Primitive::Type test_type) {
+  // garbage is created during ClassLinker::Init
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  jclass jc = env->FindClass("AllFields");
+  CHECK(jc != NULL);
+  jobject o = env->AllocObject(jc);
+  CHECK(o != NULL);
+
+  ScopedObjectAccess soa(self);
+  StackHandleScope<5> hs(self);
+  Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(o)));
+  Handle<mirror::Class> c(hs.NewHandle(obj->GetClass()));
+  // Need a method as a referrer
+  Handle<mirror::ArtMethod> m(hs.NewHandle(c->GetDirectMethod(0)));
+
+  // Play with it...
+
+  // Static fields.
+  {
+    Handle<mirror::ObjectArray<mirror::ArtField>> fields(hs.NewHandle(c.Get()->GetSFields()));
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
+
+      FieldHelper fh(f.Get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Static(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Static(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjStatic(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // Instance fields.
+  {
+    Handle<mirror::ObjectArray<mirror::ArtField>> fields(hs.NewHandle(c.Get()->GetIFields()));
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
+
+      FieldHelper fh(f.Get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Instance(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Instance(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjInstance(&obj, &f, self, m.Get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+}
+
+
+TEST_F(StubTest, Fields32) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimInt);
+}
+
+TEST_F(StubTest, FieldsObj) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimNot);
+}
+
+TEST_F(StubTest, Fields64) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimLong);
+}
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_imt_conflict_trampoline(void);
+#endif
+
+TEST_F(StubTest, IMT) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  ScopedObjectAccess soa(self);
+  StackHandleScope<7> hs(self);
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+
+  // ArrayList
+
+  // Load ArrayList and used methods (JNI).
+  jclass arraylist_jclass = env->FindClass("java/util/ArrayList");
+  ASSERT_NE(nullptr, arraylist_jclass);
+  jmethodID arraylist_constructor = env->GetMethodID(arraylist_jclass, "<init>", "()V");
+  ASSERT_NE(nullptr, arraylist_constructor);
+  jmethodID contains_jmethod = env->GetMethodID(arraylist_jclass, "contains", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, contains_jmethod);
+  jmethodID add_jmethod = env->GetMethodID(arraylist_jclass, "add", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, add_jmethod);
+
+  // Get mirror representation.
+  Handle<mirror::ArtMethod> contains_amethod(hs.NewHandle(soa.DecodeMethod(contains_jmethod)));
+
+  // Patch up ArrayList.contains.
+  if (contains_amethod.Get()->GetEntryPointFromQuickCompiledCode() == nullptr) {
+    contains_amethod.Get()->SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(
+        GetTlsPtr(self)->quick_entrypoints.pQuickToInterpreterBridge));
+  }
+
+  // List
+
+  // Load List and used methods (JNI).
+  jclass list_jclass = env->FindClass("java/util/List");
+  ASSERT_NE(nullptr, list_jclass);
+  jmethodID inf_contains_jmethod = env->GetMethodID(list_jclass, "contains", "(Ljava/lang/Object;)Z");
+  ASSERT_NE(nullptr, inf_contains_jmethod);
+
+  // Get mirror representation.
+  Handle<mirror::ArtMethod> inf_contains(hs.NewHandle(soa.DecodeMethod(inf_contains_jmethod)));
+
+  // Object
+
+  jclass obj_jclass = env->FindClass("java/lang/Object");
+  ASSERT_NE(nullptr, obj_jclass);
+  jmethodID obj_constructor = env->GetMethodID(obj_jclass, "<init>", "()V");
+  ASSERT_NE(nullptr, obj_constructor);
+
+  // Sanity check: check that there is a conflict for List.contains in ArrayList.
+
+  mirror::Class* arraylist_class = soa.Decode<mirror::Class*>(arraylist_jclass);
+  mirror::ArtMethod* m = arraylist_class->GetImTable()->Get(
+      inf_contains->GetDexMethodIndex() % ClassLinker::kImtSize);
+
+  if (!m->IsImtConflictMethod()) {
+    LOG(WARNING) << "Test is meaningless, no IMT conflict in setup: " <<
+        PrettyMethod(m, true);
+    LOG(WARNING) << "Please update StubTest.IMT.";
+    return;
+  }
+
+  // Create instances.
+
+  jobject jarray_list = env->NewObject(arraylist_jclass, arraylist_constructor);
+  ASSERT_NE(nullptr, jarray_list);
+  Handle<mirror::Object> array_list(hs.NewHandle(soa.Decode<mirror::Object*>(jarray_list)));
+
+  jobject jobj = env->NewObject(obj_jclass, obj_constructor);
+  ASSERT_NE(nullptr, jobj);
+  Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(jobj)));
+
+  // Invoke.
+
+  size_t result =
+      Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+                                   reinterpret_cast<size_t>(obj.Get()),
+                                   reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline),
+                                   self, contains_amethod.Get(),
+                                   static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex()));
+
+  ASSERT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(static_cast<size_t>(JNI_FALSE), result);
+
+  // Add object.
+
+  env->CallBooleanMethod(jarray_list, add_jmethod, jobj);
+
+  ASSERT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
+
+  // Invoke again.
+
+  result = Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+                                        reinterpret_cast<size_t>(obj.Get()),
+                                        reinterpret_cast<uintptr_t>(&art_quick_imt_conflict_trampoline),
+                                        self, contains_amethod.Get(),
+                                        static_cast<size_t>(inf_contains.Get()->GetDexMethodIndex()));
+
+  ASSERT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
+#else
+  LOG(INFO) << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index d7c88ba..f1d0746 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -19,7 +19,7 @@
 
 #include "asm_support_x86.h"
 
-#if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5))
     // Clang's as(1) doesn't let you name macro parameters prior to 3.5.
     #define MACRO0(macro_name) .macro macro_name
     #define MACRO1(macro_name, macro_arg1) .macro macro_name
@@ -32,8 +32,6 @@
     #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
-    #define FUNCTION_TYPE(name,index) .type $index, @function
-    #define SIZE(name,index) .size $index, .-$index
 
     //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
     #define LITERAL(value) $value
@@ -56,13 +54,22 @@
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
     #define CALL_MACRO(name,index) name&
-    #define FUNCTION_TYPE(name,index) .type name&, @function
-    #define SIZE(name,index) .size name, .-name
 
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $value
 #endif
 
+#if defined(__APPLE__)
+    #define FUNCTION_TYPE(name,index)
+    #define SIZE(name,index)
+#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+    #define FUNCTION_TYPE(name,index) .type $index, @function
+    #define SIZE(name,index) .size $index, .-$index
+#else
+    #define FUNCTION_TYPE(name,index) .type name&, @function
+    #define SIZE(name,index) .size name, .-name
+#endif
+
     // CFI support.
 #if !defined(__APPLE__)
     #define CFI_STARTPROC .cfi_startproc
@@ -86,7 +93,13 @@
     // Symbols.
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
-    #define PLT_SYMBOL(name) name  // ## @PLT  // TODO: Disabled for old clang 3.3
+    #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+        // TODO: Disabled for old clang 3.3, this leads to text reolocations and there should be a
+        // better fix.
+        #define PLT_SYMBOL(name) name // ## @PLT
+    #else
+        #define PLT_SYMBOL(name) name ## @PLT
+    #endif
 #else
     // Mac OS' symbols have an _ prefix.
     #define SYMBOL(name) _ ## name
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index c68d76a..8c98d91 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -16,8 +16,9 @@
 
 #include "context_x86.h"
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
+#include "quick/quick_method_frame_info.h"
 #include "stack.h"
 
 namespace art {
@@ -37,16 +38,15 @@
 
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
-  uint32_t core_spills = method->GetCoreSpillMask();
-  size_t spill_count = POPCOUNT(core_spills);
-  DCHECK_EQ(method->GetFpSpillMask(), 0u);
-  size_t frame_size = method->GetFrameSizeInBytes();
+  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
+  DCHECK_EQ(frame_info.FpSpillMask(), 0u);
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
     int j = 2;  // Offset j to skip return address spill.
     for (int i = 0; i < kNumberOfCpuRegisters; i++) {
-      if (((core_spills >> i) & 1) != 0) {
-        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+      if (((frame_info.CoreSpillMask() >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_info.FrameSizeInBytes());
         j++;
       }
     }
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c4a7b1b..8ad29dd 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -71,11 +71,8 @@
 // Math entrypoints.
 extern "C" double art_quick_fmod(double, double);
 extern "C" float art_quick_fmodf(float, float);
-extern "C" double art_quick_l2d(int64_t);
-extern "C" float art_quick_l2f(int64_t);
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
-extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
 extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);
@@ -181,12 +178,12 @@
   // points->pCmplFloat = NULL;  // Not needed on x86.
   qpoints->pFmod = art_quick_fmod;
   // qpoints->pSqrt = NULL;  // Not needed on x86.
-  qpoints->pL2d = art_quick_l2d;
+  // qpoints->pL2d = NULL;  // Not needed on x86.
   qpoints->pFmodf = art_quick_fmodf;
-  qpoints->pL2f = art_quick_l2f;
+  // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
-  qpoints->pIdivmod = art_quick_idivmod;
+  // qpoints->pIdivmod = NULL;  // Not needed on x86.
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
   qpoints->pLdiv = art_quick_ldiv;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 339ed2e..b311ea5 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -763,28 +763,6 @@
     ret
 END_FUNCTION art_quick_fmodf
 
-DEFINE_FUNCTION art_quick_l2d
-    PUSH ecx                      // push arg2 a.hi
-    PUSH eax                      // push arg1 a.lo
-    fildll (%esp)                 // load as integer and push into st0
-    fstpl (%esp)                  // pop value off fp stack as double
-    movsd (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(8), %esp         // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-8)
-    ret
-END_FUNCTION art_quick_l2d
-
-DEFINE_FUNCTION art_quick_l2f
-    PUSH ecx                      // push arg2 a.hi
-    PUSH eax                      // push arg1 a.lo
-    fildll (%esp)                 // load as integer and push into st0
-    fstps (%esp)                  // pop value off fp stack as a single
-    movss (%esp), %xmm0           // place into %xmm0
-    addl LITERAL(8), %esp         // pop argument
-    CFI_ADJUST_CFA_OFFSET(-8)
-    ret
-END_FUNCTION art_quick_l2f
-
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
@@ -807,20 +785,6 @@
     ret
 END_FUNCTION art_quick_f2l
 
-DEFINE_FUNCTION art_quick_idivmod
-    cmpl LITERAL(0x80000000), %eax
-    je .Lcheck_arg2  // special case
-.Largs_ok:
-    cdq         // edx:eax = sign extend eax
-    idiv %ecx   // (edx,eax) = (edx:eax % ecx, edx:eax / ecx)
-    ret
-.Lcheck_arg2:
-    cmpl LITERAL(-1), %ecx
-    jne .Largs_ok
-    xorl %edx, %edx
-    ret         // eax already holds min int
-END_FUNCTION art_quick_idivmod
-
 DEFINE_FUNCTION art_quick_ldiv
     subl LITERAL(12), %esp       // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
new file mode 100644
index 0000000..b9dc0d8
--- /dev/null
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_QUICK_METHOD_FRAME_INFO_X86_H_
+#define ART_RUNTIME_ARCH_X86_QUICK_METHOD_FRAME_INFO_X86_H_
+
+#include "quick/quick_method_frame_info.h"
+#include "registers_x86.h"
+#include "runtime.h"  // for Runtime::CalleeSaveType.
+
+namespace art {
+namespace x86 {
+
+static constexpr uint32_t kX86CalleeSaveRefSpills =
+    (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
+static constexpr uint32_t kX86CalleeSaveArgSpills =
+    (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
+
+constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+  return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+      (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+}
+
+constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+  return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
+                  1 /* Method* */) * kX86PointerSize, kStackAlignment);
+}
+
+constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+  return QuickMethodFrameInfo(X86CalleeSaveFrameSize(type),
+                              X86CalleeSaveCoreSpills(type),
+                              0u);
+}
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_QUICK_METHOD_FRAME_INFO_X86_H_
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 26cd864..9f36927 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,10 +40,9 @@
 
 namespace art {
 
-static Mutex modify_ldt_lock("modify_ldt lock");
-
 void Thread::InitCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  // Take the ldt lock, Thread::Current isn't yet established.
+  MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
   const size_t limit = kPageSize;
@@ -138,7 +137,7 @@
 }
 
 void Thread::CleanupCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  MutexLock mu(this, *Locks::modify_ldt_lock_);
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 29a7065..810ef94 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -16,8 +16,9 @@
 
 #include "context_x86_64.h"
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
+#include "quick/quick_method_frame_info.h"
 #include "stack.h"
 
 namespace art {
@@ -40,17 +41,15 @@
 
 void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
-  uint32_t core_spills = method->GetCoreSpillMask();
-  uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = POPCOUNT(core_spills);
-  size_t fp_spill_count = POPCOUNT(fp_core_spills);
-  size_t frame_size = method->GetFrameSizeInBytes();
+  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+  size_t spill_count = POPCOUNT(frame_info.CoreSpillMask());
+  size_t fp_spill_count = POPCOUNT(frame_info.FpSpillMask());
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
     size_t j = 2;  // Offset j to skip return address spill.
     for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) {
-      if (((core_spills >> i) & 1) != 0) {
-        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+      if (((frame_info.CoreSpillMask() >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_info.FrameSizeInBytes());
         j++;
       }
     }
@@ -59,8 +58,9 @@
     // Lowest number spill is farthest away, walk registers and fill into context.
     size_t j = 2;  // Offset j to skip return address spill.
     for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
-      if (((fp_core_spills >> i) & 1) != 0) {
-        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size);
+      if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
+        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j,
+                                        frame_info.FrameSizeInBytes());
         j++;
       }
     }
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 30067cf..86dcf36 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -18,6 +18,7 @@
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/math_entrypoints.h"
 
 namespace art {
 
@@ -34,8 +35,8 @@
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
-                                                const mirror::Class* ref_class);
+extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+                                            const mirror::Class* ref_class);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -69,13 +70,8 @@
 extern "C" void art_quick_unlock_object(void*);
 
 // Math entrypoints.
-extern "C" double art_quick_fmod(double, double);
-extern "C" float art_quick_fmodf(float, float);
-extern "C" double art_quick_l2d(int64_t);
-extern "C" float art_quick_l2f(int64_t);
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
-extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
 extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);
@@ -85,7 +81,6 @@
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
-extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -133,7 +128,7 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
+  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -180,16 +175,16 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  qpoints->pFmod = art_quick_fmod;
+  qpoints->pFmod = fmod;
   // qpoints->pSqrt = NULL;  // Not needed on x86.
-  qpoints->pL2d = art_quick_l2d;
-  qpoints->pFmodf = art_quick_fmodf;
-  qpoints->pL2f = art_quick_l2f;
+  // qpoints->pL2d = NULL;  // Not needed on x86.
+  qpoints->pFmodf = fmodf;
+  // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
-  qpoints->pIdivmod = art_quick_idivmod;
-  qpoints->pD2l = art_quick_d2l;
-  qpoints->pF2l = art_quick_f2l;
+  // qpoints->pIdivmod = NULL;  // Not needed on x86.
+  qpoints->pD2l = art_d2l;
+  qpoints->pF2l = art_f2l;
   qpoints->pLdiv = art_quick_ldiv;
   qpoints->pLmod = art_quick_lmod;
   qpoints->pLmul = art_quick_lmul;
@@ -198,7 +193,7 @@
   qpoints->pUshrLong = art_quick_lushr;
 
   // Intrinsics
-  qpoints->pIndexOf = art_quick_indexof;
+  // qpoints->pIndexOf = NULL;  // Not needed on x86.
   qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 9ccf6c9..971688d 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -239,24 +239,45 @@
 
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
-     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
      * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
      * stack and call the appropriate C helper.
-     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
      *
-     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
-     * of the target Method* in r0 and method->code_ in r1.
+     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
+     * of the target Method* in rax and method->code_ in rdx.
      *
-     * If unsuccessful, the helper will return NULL/NULL. There will bea pending exception in the
+     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
      * thread and we branch to another stub to deliver it.
      *
-     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
-     * pointing back to the original caller.
+     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
+     * location on the stack.
+     *
+     * Adapted from x86 code.
      */
 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    int3
-    int3
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
+    // Helper signature is always
+    // (method_idx, *this_object, *caller_method, *self, sp)
+
+    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
+    movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
+    movq %rsp, %r8                                         // pass SP
+
+    call PLT_VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
+                                                           // save the code pointer
+    movq %rax, %rdi
+    movq %rdx, %rax
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    testq %rdi, %rdi
+    jz 1f
+
+    // Tail call to intended method.
+    jmp *%rax
+1:
+    DELIVER_PENDING_EXCEPTION
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
@@ -476,7 +497,6 @@
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     // Outgoing argument set up
     movq %rsp, %rsi                   // pass SP
@@ -489,7 +509,6 @@
 
 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
     movq %rsp, %rdx                    // pass SP
@@ -697,8 +716,8 @@
     jz   .Lslow_unlock
     movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test %ecx, %ecx
-    jb   .Lslow_unlock                    // lock word contains a monitor
+    test LITERAL(0xC0000000), %ecx
+    jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx
@@ -718,11 +737,6 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
-DEFINE_FUNCTION art_quick_is_assignable
-    int3
-    int3
-END_FUNCTION art_quick_is_assignable
-
 DEFINE_FUNCTION art_quick_check_cast
     PUSH rdi                          // Save args for exc
     PUSH rsi
@@ -857,31 +871,82 @@
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
-UNIMPLEMENTED art_quick_fmod
-UNIMPLEMENTED art_quick_fmodf
-UNIMPLEMENTED art_quick_l2d
-UNIMPLEMENTED art_quick_l2f
-UNIMPLEMENTED art_quick_d2l
-UNIMPLEMENTED art_quick_f2l
-UNIMPLEMENTED art_quick_idivmod
 UNIMPLEMENTED art_quick_ldiv
 UNIMPLEMENTED art_quick_lmod
 UNIMPLEMENTED art_quick_lmul
 UNIMPLEMENTED art_quick_lshl
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
-UNIMPLEMENTED art_quick_set32_instance
-UNIMPLEMENTED art_quick_set64_instance
-UNIMPLEMENTED art_quick_set_obj_instance
-UNIMPLEMENTED art_quick_get32_instance
-UNIMPLEMENTED art_quick_get64_instance
-UNIMPLEMENTED art_quick_get_obj_instance
-UNIMPLEMENTED art_quick_set32_static
-UNIMPLEMENTED art_quick_set64_static
-UNIMPLEMENTED art_quick_set_obj_static
-UNIMPLEMENTED art_quick_get32_static
-UNIMPLEMENTED art_quick_get64_static
-UNIMPLEMENTED art_quick_get_obj_static
+
+
+MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rsi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
+    movq %rsp, %rcx                    // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rdx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rcx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
+    movq %rsp, %r9                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
+
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
+
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+
+// This is singled out as the argument order is different.
+DEFINE_FUNCTION art_quick_set64_static
+    movq %rsi, %rdx                    // pass new_val
+    movq 8(%rsp), %rsi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // field_idx is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    RETURN_IF_EAX_ZERO                 // return or deliver exception
+END_FUNCTION art_quick_set64_static
+
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
     // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
@@ -921,9 +986,18 @@
 END_FUNCTION art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict.
+     * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways).
+     *
+     * xmm0 is a hidden argument that holds the target method's dex method index.
+     * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler.
      */
-UNIMPLEMENTED art_quick_imt_conflict_trampoline
+DEFINE_FUNCTION art_quick_imt_conflict_trampoline
+    movq 16(%rsp), %rdi            // load caller Method*
+    movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
+    movd %xmm0, %rax               // get target method index stored in xmm0
+    movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
+    jmp art_quick_invoke_interface_trampoline_local
+END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
@@ -937,7 +1011,6 @@
     jz 1f
     jmp *%r10                     // Tail call into method.
 1:
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_resolution_trampoline
 
@@ -995,12 +1068,12 @@
  * | Return            |
  * | Callee-Save Data  |
  * #-------------------#
- * | SIRT              |
+ * | handle scope      |
  * #-------------------#
  * | Method*           |    <--- (1)
  * #-------------------#
  * | local ref cookie  | // 4B
- * | SIRT size         | // 4B   TODO: roll into call stack alignment?
+ * | handle scope size | // 4B   TODO: roll into call stack alignment?
  * #-------------------#
  * | JNI Call Stack    |
  * #-------------------#    <--- SP on native call
@@ -1053,8 +1126,8 @@
     //
     //      4    local state ref
     //      4    padding
-    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: SIRT overhead?)
-    //     16    SIRT member fields ?
+    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
+    //     16    handle scope member fields ?
     // +  112    14x 8-byte stack-2-register space
     // ------
     //   4332
@@ -1159,7 +1232,7 @@
     movq %rbx, %rsp
     CFI_DEF_CFA_REGISTER(rsp)
 .Lexception_in_native:
-    // TODO: the SIRT contains the this pointer which is used by the debugger for exception
+    // TODO: the handle scope contains the this pointer which is used by the debugger for exception
     //       delivery.
     movq %xmm0, 16(%rsp)         // doesn't make sense!!!
     movq 24(%rsp), %xmm1            // neither does this!!!
@@ -1216,8 +1289,6 @@
      */
 UNIMPLEMENTED art_quick_deoptimize
 
-UNIMPLEMENTED art_quick_indexof
-
     /*
      * String's compareTo.
      *
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
new file mode 100644
index 0000000..6183909
--- /dev/null
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_QUICK_METHOD_FRAME_INFO_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_QUICK_METHOD_FRAME_INFO_X86_64_H_
+
+#include "quick/quick_method_frame_info.h"
+#include "registers_x86_64.h"
+#include "runtime.h"  // for Runtime::CalleeSaveType.
+
+namespace art {
+namespace x86_64 {
+
+static constexpr uint32_t kX86_64CalleeSaveRefSpills =
+    (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
+    (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
+static constexpr uint32_t kX86_64CalleeSaveArgSpills =
+    (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
+    (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
+static constexpr uint32_t kX86_64CalleeSaveFpArgSpills =
+    (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
+    (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
+    (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
+
+constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+  return kX86_64CalleeSaveRefSpills |
+      (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
+      (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save;
+}
+
+constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+  return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0);
+}
+
+constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+  return RoundUp((POPCOUNT(X86_64CalleeSaveCoreSpills(type)) /* gprs */ +
+                  POPCOUNT(X86_64CalleeSaveFpSpills(type)) /* fprs */ +
+                  1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
+}
+
+constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+  return QuickMethodFrameInfo(X86_64CalleeSaveFrameSize(type),
+                              X86_64CalleeSaveCoreSpills(type),
+                              X86_64CalleeSaveFpSpills(type));
+}
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_QUICK_METHOD_FRAME_INFO_X86_64_H_
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 6867fef..9262db6 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -17,7 +17,15 @@
 #ifndef ART_RUNTIME_ATOMIC_H_
 #define ART_RUNTIME_ATOMIC_H_
 
+#ifdef __clang__
+#define ART_HAVE_STDATOMIC 1
+#endif
+
 #include <stdint.h>
+#if ART_HAVE_STDATOMIC
+#include <atomic>
+#endif
+#include <limits>
 #include <vector>
 
 #include "base/logging.h"
@@ -27,6 +35,76 @@
 
 class Mutex;
 
+#if ART_HAVE_STDATOMIC
+template<typename T>
+class Atomic : public std::atomic<T> {
+ public:
+  COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>),
+                 std_atomic_size_differs_from_that_of_underlying_type);
+  COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>),
+                 std_atomic_alignment_differs_from_that_of_underlying_type);
+
+  Atomic<T>() : std::atomic<T>() { }
+
+  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const {
+    return this->load(std::memory_order_seq_cst);
+  }
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired) {
+    this->store(desired, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
+  }
+
+  volatile T* Address() {
+    return reinterpret_cast<T*>(this);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+};
+#else
 template<typename T>
 class Atomic {
  public:
@@ -34,24 +112,54 @@
 
   explicit Atomic<T>(T value) : value_(value) { }
 
-  Atomic<T>& operator=(T desired) {
-    Store(desired);
-    return *this;
-  }
-
-  T Load() const {
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
     return value_;
   }
 
-  operator T() const {
-    return Load();
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const;
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    value_ = desired;
   }
 
-  T FetchAndAdd(const T value) {
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired);
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    // TODO: make this relaxed.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    // TODO: make this acquire.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    // TODO: make this release.
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
     return __sync_fetch_and_add(&value_, value);  // Return old_value.
   }
 
-  T FetchAndSub(const T value) {
+  T FetchAndSubSequentiallyConsistent(const T value) {
     return __sync_fetch_and_sub(&value_, value);  // Return old value.
   }
 
@@ -71,22 +179,14 @@
     return __sync_fetch_and_sub(&value_, 1);  // Return old value.
   }
 
-  bool CompareAndSwap(T expected_value, T desired_value) {
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile T* Address() {
-    return &value_;
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
   }
 
  private:
-  // Unsafe = operator for non atomic operations on the integer.
-  void Store(T desired) {
-    value_ = desired;
-  }
-
-  volatile T value_;
+  T value_;
 };
+#endif
 
 typedef Atomic<int32_t> AtomicInteger;
 
@@ -196,7 +296,7 @@
 
   // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
   static bool LongAtomicsUseMutexes() {
-    return !kNeedSwapMutexes;
+    return kNeedSwapMutexes;
   }
 
   static void MembarLoadStore() {
@@ -260,6 +360,23 @@
   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 };
 
+#if !ART_HAVE_STDATOMIC
+template<typename T>
+inline T Atomic<T>::LoadSequentiallyConsistent() const {
+  T result = value_;
+  QuasiAtomic::MembarLoadLoad();
+  return result;
+}
+
+template<typename T>
+inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
+  QuasiAtomic::MembarStoreStore();
+  value_ = desired;
+  QuasiAtomic::MembarStoreLoad();
+}
+
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ATOMIC_H_
diff --git a/runtime/barrier.h b/runtime/barrier.h
index 0c7fd87..a433cac 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_BARRIER_H_
 #define ART_RUNTIME_BARRIER_H_
 
+#include <memory>
 #include "base/mutex.h"
-#include "UniquePtr.h"
 
 namespace art {
 
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 7d32338..086ef44 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,7 +22,6 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
-#include "UniquePtr.h"
 
 namespace art {
 class CheckWaitTask : public Task {
@@ -78,20 +77,20 @@
   barrier.Increment(self, num_threads);
   // At this point each thread should have passed through the barrier. The first count should be
   // equal to num_threads.
-  EXPECT_EQ(num_threads, count1);
+  EXPECT_EQ(num_threads, count1.LoadRelaxed());
   // Count 3 should still be zero since no thread should have gone past the second barrier.
-  EXPECT_EQ(0, count3);
+  EXPECT_EQ(0, count3.LoadRelaxed());
   // Now lets tell the threads to pass again.
   barrier.Increment(self, num_threads);
   // Count 2 should be equal to num_threads since each thread must have passed the second barrier
   // at this point.
-  EXPECT_EQ(num_threads, count2);
+  EXPECT_EQ(num_threads, count2.LoadRelaxed());
   // Wait for all the threads to finish.
   thread_pool.Wait(self, true, false);
   // All three counts should be equal to num_threads now.
-  EXPECT_EQ(count1, count2);
-  EXPECT_EQ(count2, count3);
-  EXPECT_EQ(num_threads, count3);
+  EXPECT_EQ(count1.LoadRelaxed(), count2.LoadRelaxed());
+  EXPECT_EQ(count2.LoadRelaxed(), count3.LoadRelaxed());
+  EXPECT_EQ(num_threads, count3.LoadRelaxed());
 }
 
 class CheckPassTask : public Task {
@@ -134,7 +133,7 @@
   // Wait for all the tasks to complete using the barrier.
   barrier.Increment(self, expected_total_tasks);
   // The total number of completed tasks should be equal to expected_total_tasks.
-  EXPECT_EQ(count, expected_total_tasks);
+  EXPECT_EQ(count.LoadRelaxed(), expected_total_tasks);
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 3df5101..0053389 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -43,11 +43,13 @@
   : allocator_(allocator),
     expandable_(expandable),
     storage_size_(storage_size),
-    storage_(storage) {
-  DCHECK_EQ(sizeof(*storage_), 4U);  // Assuming 32-bit units.
+    storage_(storage),
+    number_of_bits_(start_bits) {
+  COMPILE_ASSERT(sizeof(*storage_) == kWordBytes, check_word_bytes);
+  COMPILE_ASSERT(sizeof(*storage_) * 8u == kWordBits, check_word_bits);
   if (storage_ == nullptr) {
     storage_size_ = BitsToWords(start_bits);
-    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(*storage_)));
+    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * kWordBytes));
   }
 }
 
@@ -60,7 +62,7 @@
  */
 bool BitVector::IsBitSet(uint32_t num) const {
   // If the index is over the size:
-  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+  if (num >= storage_size_ * kWordBits) {
     // Whether it is expandable or not, this bit does not exist: thus it is not set.
     return false;
   }
@@ -70,7 +72,7 @@
 
 // Mark all bits bit as "clear".
 void BitVector::ClearAllBits() {
-  memset(storage_, 0, storage_size_ * sizeof(*storage_));
+  memset(storage_, 0, storage_size_ * kWordBytes);
 }
 
 // Mark the specified bit as "set".
@@ -79,20 +81,21 @@
  * not using it badly or change resize mechanism.
  */
 void BitVector::SetBit(uint32_t num) {
-  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+  if (num >= storage_size_ * kWordBits) {
     DCHECK(expandable_) << "Attempted to expand a non-expandable bitmap to position " << num;
 
     /* Round up to word boundaries for "num+1" bits */
     uint32_t new_size = BitsToWords(num + 1);
     DCHECK_GT(new_size, storage_size_);
     uint32_t *new_storage =
-        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(*storage_)));
-    memcpy(new_storage, storage_, storage_size_ * sizeof(*storage_));
+        static_cast<uint32_t*>(allocator_->Alloc(new_size * kWordBytes));
+    memcpy(new_storage, storage_, storage_size_ * kWordBytes);
     // Zero out the new storage words.
-    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(*storage_));
+    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * kWordBytes);
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
+    number_of_bits_ = num;
   }
 
   storage_[num >> 5] |= check_masks[num & 0x1f];
@@ -101,7 +104,7 @@
 // Mark the specified bit as "unset".
 void BitVector::ClearBit(uint32_t num) {
   // If the index is over the size, we don't have to do anything, it is cleared.
-  if (num < storage_size_ * sizeof(*storage_) * 8) {
+  if (num < storage_size_ * kWordBits) {
     // Otherwise, go ahead and clear it.
     storage_[num >> 5] &= ~check_masks[num & 0x1f];
   }
@@ -113,23 +116,24 @@
 
   // If the highest bit set is different, we are different.
   if (our_highest != src_highest) {
-    return true;
+    return false;
   }
 
   // If the highest bit set is -1, both are cleared, we are the same.
   // If the highest bit set is 0, both have a unique bit set, we are the same.
-  if (our_highest >= 0) {
+  if (our_highest <= 0) {
     return true;
   }
 
-  // Get the highest bit set's cell's index.
-  int our_highest_index = (our_highest >> 5);
+  // Get the highest bit set's cell's index
+  // No need of highest + 1 here because it can't be 0 so BitsToWords will work here.
+  int our_highest_index = BitsToWords(our_highest);
 
   // This memcmp is enough: we know that the highest bit set is the same for both:
   //   - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
   //      ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
   //          they are automatically at 0.
-  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) != 0);
+  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * kWordBytes) == 0);
 }
 
 // Intersect with another bit vector.
@@ -156,13 +160,14 @@
 /*
  * Union with another bit vector.
  */
-void BitVector::Union(const BitVector* src) {
+bool BitVector::Union(const BitVector* src) {
   // Get the highest bit to determine how much we need to expand.
   int highest_bit = src->GetHighestBitSet();
+  bool changed = false;
 
   // If src has no bit set, we are done: there is no need for a union with src.
   if (highest_bit == -1) {
-    return;
+    return changed;
   }
 
   // Update src_size to how many cells we actually care about: where the bit is + 1.
@@ -170,16 +175,72 @@
 
   // Is the storage size smaller than src's?
   if (storage_size_ < src_size) {
+    changed = true;
+
     // Set it to reallocate.
     SetBit(highest_bit);
 
     // Paranoid: storage size should be big enough to hold this bit now.
-    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * kWordBits);
   }
 
   for (uint32_t idx = 0; idx < src_size; idx++) {
-    storage_[idx] |= src->GetRawStorageWord(idx);
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing | src->GetRawStorageWord(idx);
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
   }
+  return changed;
+}
+
+bool BitVector::UnionIfNotIn(const BitVector* union_with, const BitVector* not_in) {
+  // Get the highest bit to determine how much we need to expand.
+  int highest_bit = union_with->GetHighestBitSet();
+  bool changed = false;
+
+  // If src has no bit set, we are done: there is no need for a union with src.
+  if (highest_bit == -1) {
+    return changed;
+  }
+
+  // Update union_with_size to how many cells we actually care about: where the bit is + 1.
+  uint32_t union_with_size = BitsToWords(highest_bit + 1);
+
+  // Is the storage size smaller than src's?
+  if (storage_size_ < union_with_size) {
+    changed = true;
+
+    // Set it to reallocate.
+    SetBit(highest_bit);
+
+    // Paranoid: storage size should be big enough to hold this bit now.
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * kWordBits);
+  }
+
+  uint32_t not_in_size = not_in->GetStorageSize();
+
+  uint32_t idx = 0;
+  for (; idx < std::min(not_in_size, union_with_size); idx++) {
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing |
+        (union_with->GetRawStorageWord(idx) & ~not_in->GetRawStorageWord(idx));
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
+  }
+
+  for (; idx < union_with_size; idx++) {
+    uint32_t existing = storage_[idx];
+    uint32_t update = existing | union_with->GetRawStorageWord(idx);
+    if (existing != update) {
+      changed = true;
+      storage_[idx] = update;
+    }
+  }
+  return changed;
 }
 
 void BitVector::Subtract(const BitVector *src) {
@@ -208,14 +269,10 @@
 
 // Count the number of bits that are set in range [0, end).
 uint32_t BitVector::NumSetBits(uint32_t end) const {
-  DCHECK_LE(end, storage_size_ * sizeof(*storage_) * 8);
+  DCHECK_LE(end, storage_size_ * kWordBits);
   return NumSetBits(storage_, end);
 }
 
-BitVector::Iterator* BitVector::GetIterator() const {
-  return new (allocator_) Iterator(this);
-}
-
 /*
  * Mark specified number of bits as "set". Cannot set all bits like ClearAll
  * since there might be unused bits - setting those to one will confuse the
@@ -269,7 +326,7 @@
       }
 
       // Return cnt + how many storage units still remain * the number of bits per unit.
-      int res = cnt + (idx * (sizeof(*storage_) * 8));
+      int res = cnt + (idx * kWordBits);
       return res;
     }
   }
@@ -309,14 +366,14 @@
   SetBit(highest_bit);
 
   // Now set until highest bit's storage.
-  uint32_t size = 1 + (highest_bit / (sizeof(*storage_) * 8));
-  memcpy(storage_, src->GetRawStorage(), sizeof(*storage_) * size);
+  uint32_t size = 1 + (highest_bit / kWordBits);
+  memcpy(storage_, src->GetRawStorage(), kWordBytes * size);
 
   // Set upper bits to 0.
   uint32_t left = storage_size_ - size;
 
   if (left > 0) {
-    memset(storage_ + size, 0, sizeof(*storage_) * left);
+    memset(storage_ + size, 0, kWordBytes * left);
   }
 }
 
@@ -339,16 +396,14 @@
   return count;
 }
 
-void BitVector::Dump(std::ostream& os, const char *prefix) {
+void BitVector::Dump(std::ostream& os, const char *prefix) const {
   std::ostringstream buffer;
-  DumpHelper(buffer, prefix);
-  os << buffer << std::endl;
+  DumpHelper(prefix, buffer);
+  os << buffer.str() << std::endl;
 }
 
-void BitVector::DumpDot(FILE* file, const char* prefix, bool last_entry) {
-  std::ostringstream buffer;
-  Dump(buffer, prefix);
 
+void BitVector::DumpDotHelper(bool last_entry, FILE* file, std::ostringstream& buffer) const {
   // Now print it to the file.
   fprintf(file, "    {%s}", buffer.str().c_str());
 
@@ -361,19 +416,42 @@
   fprintf(file, "\\\n");
 }
 
-void BitVector::DumpHelper(std::ostringstream& buffer, const char* prefix) {
+void BitVector::DumpDot(FILE* file, const char* prefix, bool last_entry) const {
+  std::ostringstream buffer;
+  DumpHelper(prefix, buffer);
+  DumpDotHelper(last_entry, file, buffer);
+}
+
+void BitVector::DumpIndicesDot(FILE* file, const char* prefix, bool last_entry) const {
+  std::ostringstream buffer;
+  DumpIndicesHelper(prefix, buffer);
+  DumpDotHelper(last_entry, file, buffer);
+}
+
+void BitVector::DumpIndicesHelper(const char* prefix, std::ostringstream& buffer) const {
   // Initialize it.
   if (prefix != nullptr) {
     buffer << prefix;
   }
 
-  int max = GetHighestBitSet();
-
-  for (int i = 0; i <= max; i++) {
+  for (size_t i = 0; i < number_of_bits_; i++) {
     if (IsBitSet(i)) {
       buffer << i << " ";
     }
   }
 }
 
+void BitVector::DumpHelper(const char* prefix, std::ostringstream& buffer) const {
+  // Initialize it.
+  if (prefix != nullptr) {
+    buffer << prefix;
+  }
+
+  buffer << '(';
+  for (size_t i = 0; i < number_of_bits_; i++) {
+    buffer << IsBitSet(i);
+  }
+  buffer << ')';
+}
+
 }  // namespace art
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index db29c49..8f9afff 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -32,59 +32,115 @@
  */
 class BitVector {
   public:
-    class Iterator {
+    class IndexContainer;
+
+    /**
+     * @brief Convenient iterator across the indexes of the BitVector's set bits.
+     *
+     * @details IndexIterator is a Forward iterator (C++11: 24.2.5) from the lowest
+     * to the highest index of the BitVector's set bits. Instances can be retrieved
+     * only through BitVector::Indexes() which returns an IndexContainer wrapper
+     * object with begin() and end() suitable for range-based loops:
+     *   for (uint32_t idx : bit_vector.Indexes()) {
+     *     // Use idx.
+     *   }
+     */
+    class IndexIterator
+        : std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, uint32_t> {
       public:
-        explicit Iterator(const BitVector* bit_vector)
-          : p_bits_(bit_vector),
-            bit_storage_(bit_vector->GetRawStorage()),
-            bit_index_(0),
-            bit_size_(p_bits_->storage_size_ * sizeof(uint32_t) * 8) {}
-
-        // Return the position of the next set bit.  -1 means end-of-element reached.
-        int32_t Next() {
-          // Did anything obviously change since we started?
-          DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8);
-          DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage());
-
-          if (UNLIKELY(bit_index_ >= bit_size_)) {
-            return -1;
-          }
-
-          uint32_t word_index = bit_index_ / 32;
-          uint32_t word = bit_storage_[word_index];
-          // Mask out any bits in the first word we've already considered.
-          word >>= bit_index_ & 0x1f;
-          if (word == 0) {
-            bit_index_ &= ~0x1f;
-            do {
-              word_index++;
-              if (UNLIKELY((word_index * 32) >= bit_size_)) {
-                bit_index_ = bit_size_;
-                return -1;
-              }
-              word = bit_storage_[word_index];
-              bit_index_ += 32;
-            } while (word == 0);
-          }
-          bit_index_ += CTZ(word) + 1;
-          return bit_index_ - 1;
+        bool operator==(const IndexIterator& other) const {
+          DCHECK(bit_storage_ == other.bit_storage_);
+          DCHECK_EQ(storage_size_, other.storage_size_);
+          return bit_index_ == other.bit_index_;
         }
 
-        static void* operator new(size_t size, Allocator* allocator) {
-          return allocator->Alloc(sizeof(BitVector::Iterator));
-        };
-        static void operator delete(void* p) {
-          Iterator* it = reinterpret_cast<Iterator*>(p);
-          it->p_bits_->allocator_->Free(p);
+        bool operator!=(const IndexIterator& other) const {
+          return !(*this == other);
+        }
+
+        int operator*() const {
+          DCHECK_LT(bit_index_, BitSize());
+          return bit_index_;
+        }
+
+        IndexIterator& operator++() {
+          DCHECK_LT(bit_index_, BitSize());
+          bit_index_ = FindIndex(bit_index_ + 1u);
+          return *this;
+        }
+
+        IndexIterator operator++(int) {
+          IndexIterator result(*this);
+          ++*this;
+          return result;
+        }
+
+        // Helper function to check for end without comparing with bit_vector.Indexes().end().
+        bool Done() const {
+          return bit_index_ == BitSize();
         }
 
       private:
-        const BitVector* const p_bits_;
-        const uint32_t* const bit_storage_;
-        uint32_t bit_index_;           // Current index (size in bits).
-        const uint32_t bit_size_;      // Size of vector in bits.
+        struct begin_tag { };
+        struct end_tag { };
 
-        friend class BitVector;
+        IndexIterator(const BitVector* bit_vector, begin_tag)
+          : bit_storage_(bit_vector->GetRawStorage()),
+            storage_size_(bit_vector->storage_size_),
+            bit_index_(FindIndex(0u)) { }
+
+        IndexIterator(const BitVector* bit_vector, end_tag)
+          : bit_storage_(bit_vector->GetRawStorage()),
+            storage_size_(bit_vector->storage_size_),
+            bit_index_(BitSize()) { }
+
+        uint32_t BitSize() const {
+          return storage_size_ * kWordBits;
+        }
+
+        uint32_t FindIndex(uint32_t start_index) const {
+          DCHECK_LE(start_index, BitSize());
+          uint32_t word_index = start_index / kWordBits;
+          if (UNLIKELY(word_index == storage_size_)) {
+            return start_index;
+          }
+          uint32_t word = bit_storage_[word_index];
+          // Mask out any bits in the first word we've already considered.
+          word &= static_cast<uint32_t>(-1) << (start_index & 0x1f);
+          while (word == 0u) {
+            ++word_index;
+            if (UNLIKELY(word_index == storage_size_)) {
+              return BitSize();
+            }
+            word = bit_storage_[word_index];
+          }
+          return word_index * 32u + CTZ(word);
+        }
+
+        const uint32_t* const bit_storage_;
+        const uint32_t storage_size_;  // Size of vector in words.
+        uint32_t bit_index_;           // Current index (size in bits).
+
+        friend class BitVector::IndexContainer;
+    };
+
+    /**
+     * @brief BitVector wrapper class for iteration across indexes of set bits.
+     */
+    class IndexContainer {
+     public:
+      explicit IndexContainer(const BitVector* bit_vector) : bit_vector_(bit_vector) { }
+
+      IndexIterator begin() const {
+        return IndexIterator(bit_vector_, IndexIterator::begin_tag());
+      }
+
+      IndexIterator end() const {
+        return IndexIterator(bit_vector_, IndexIterator::end_tag());
+      }
+
+     private:
+      const BitVector* const bit_vector_;
     };
 
     BitVector(uint32_t start_bits,
@@ -103,7 +159,11 @@
 
     void Copy(const BitVector* src);
     void Intersect(const BitVector* src2);
-    void Union(const BitVector* src);
+    bool Union(const BitVector* src);
+
+    // Set bits of union_with that are not in not_in.
+    bool UnionIfNotIn(const BitVector* union_with, const BitVector* not_in);
+
     void Subtract(const BitVector* src);
     // Are we equal to another bit vector?  Note: expandability attributes must also match.
     bool Equal(const BitVector* src) {
@@ -123,14 +183,16 @@
     // Number of bits set in range [0, end).
     uint32_t NumSetBits(uint32_t end) const;
 
-    Iterator* GetIterator() const;
+    IndexContainer Indexes() const {
+      return IndexContainer(this);
+    }
 
     uint32_t GetStorageSize() const { return storage_size_; }
     bool IsExpandable() const { return expandable_; }
     uint32_t GetRawStorageWord(size_t idx) const { return storage_[idx]; }
     uint32_t* GetRawStorage() { return storage_; }
     const uint32_t* GetRawStorage() const { return storage_; }
-    size_t GetSizeOf() const { return storage_size_ * sizeof(uint32_t); }
+    size_t GetSizeOf() const { return storage_size_ * kWordBytes; }
 
     /**
      * @return the highest bit set, -1 if none are set
@@ -144,17 +206,48 @@
 
     bool EnsureSizeAndClear(unsigned int num);
 
-    void Dump(std::ostream& os, const char* prefix);
-    void DumpDot(FILE* file, const char* prefix, bool last_entry = false);
+    void Dump(std::ostream& os, const char* prefix) const;
+
+    /**
+     * @brief last_entry is this the last entry for the dot dumping
+     * @details if not, a "|" is appended to the dump.
+     */
+    void DumpDot(FILE* file, const char* prefix, bool last_entry = false) const;
+
+    /**
+     * @brief last_entry is this the last entry for the dot dumping
+     * @details if not, a "|" is appended to the dump.
+     */
+    void DumpIndicesDot(FILE* file, const char* prefix, bool last_entry = false) const;
 
   protected:
-    void DumpHelper(std::ostringstream& buffer, const char* prefix);
+    /**
+     * @brief Dump the bitvector into buffer in a 00101..01 format.
+     * @param buffer the ostringstream used to dump the bitvector into.
+     */
+    void DumpHelper(const char* prefix, std::ostringstream& buffer) const;
+
+    /**
+     * @brief Dump the bitvector in a 1 2 5 8 format, where the numbers are the bit set.
+     * @param buffer the ostringstream used to dump the bitvector into.
+     */
+    void DumpIndicesHelper(const char* prefix, std::ostringstream& buffer) const;
+
+    /**
+     * @brief Wrapper to perform the bitvector dumping with the .dot format.
+     * @param buffer the ostringstream used to dump the bitvector into.
+     */
+    void DumpDotHelper(bool last_entry, FILE* file, std::ostringstream& buffer) const;
 
   private:
+    static constexpr uint32_t kWordBytes = sizeof(uint32_t);
+    static constexpr uint32_t kWordBits = kWordBytes * 8;
+
     Allocator* const allocator_;
     const bool expandable_;         // expand bitmap if we run out?
     uint32_t   storage_size_;       // current size, in 32-bit words.
     uint32_t*  storage_;
+    uint32_t number_of_bits_;
 };
 
 
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index 2ff55cb..1403f50 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include "UniquePtr.h"
+#include <memory>
+
 #include "bit_vector.h"
 #include "gtest/gtest.h"
 
@@ -37,11 +38,8 @@
   EXPECT_EQ(0U, bv.GetRawStorageWord(0));
   EXPECT_EQ(0U, *bv.GetRawStorage());
 
-  BitVector::Iterator empty_iterator(&bv);
-  EXPECT_EQ(-1, empty_iterator.Next());
-
-  UniquePtr<BitVector::Iterator> empty_iterator_on_heap(bv.GetIterator());
-  EXPECT_EQ(-1, empty_iterator_on_heap->Next());
+  EXPECT_TRUE(bv.Indexes().begin().Done());
+  EXPECT_TRUE(bv.Indexes().begin() == bv.Indexes().end());
 
   bv.SetBit(0);
   bv.SetBit(kBits - 1);
@@ -56,10 +54,14 @@
   EXPECT_EQ(0x80000001U, bv.GetRawStorageWord(0));
   EXPECT_EQ(0x80000001U, *bv.GetRawStorage());
 
-  BitVector::Iterator iterator(&bv);
-  EXPECT_EQ(0, iterator.Next());
-  EXPECT_EQ(static_cast<int>(kBits - 1), iterator.Next());
-  EXPECT_EQ(-1, iterator.Next());
+  BitVector::IndexIterator iterator = bv.Indexes().begin();
+  EXPECT_TRUE(iterator != bv.Indexes().end());
+  EXPECT_EQ(0, *iterator);
+  ++iterator;
+  EXPECT_TRUE(iterator != bv.Indexes().end());
+  EXPECT_EQ(static_cast<int>(kBits - 1), *iterator);
+  ++iterator;
+  EXPECT_TRUE(iterator == bv.Indexes().end());
 }
 
 TEST(BitVector, NoopAllocator) {
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index 966b97f..454f2ab 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
+#include <memory>
+#include <sstream>
+
 #include "gtest/gtest.h"
 #include "histogram-inl.h"
-#include "UniquePtr.h"
-
-#include <sstream>
 
 namespace art {
 
@@ -34,7 +34,7 @@
 //   PerValue = hist->PercentileVal(0.50); finds the 50th percentile(median).
 
 TEST(Histtest, MeanTest) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("MeanTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("MeanTest", 5));
 
   double mean;
   for (size_t Idx = 0; Idx < 90; Idx++) {
@@ -52,7 +52,7 @@
 }
 
 TEST(Histtest, VarianceTest) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("VarianceTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("VarianceTest", 5));
 
   double variance;
   hist->AddValue(9);
@@ -64,7 +64,7 @@
 }
 
 TEST(Histtest, Percentile) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Percentile", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("Percentile", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -91,7 +91,7 @@
 }
 
 TEST(Histtest, UpdateRange) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("UpdateRange", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("UpdateRange", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -131,7 +131,7 @@
 }
 
 TEST(Histtest, Reset) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Reset", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("Reset", 5));
 
   double PerValue;
   hist->AddValue(0);
@@ -174,7 +174,7 @@
 }
 
 TEST(Histtest, MultipleCreateHist) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("MultipleCreateHist", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("MultipleCreateHist", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
@@ -213,7 +213,7 @@
 }
 
 TEST(Histtest, SingleValue) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("SingleValue", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("SingleValue", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   hist->AddValue(1);
@@ -225,7 +225,7 @@
 }
 
 TEST(Histtest, CappingPercentiles) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("CappingPercentiles", 5));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("CappingPercentiles", 5));
   Histogram<uint64_t>::CumulativeData data;
 
   double per_995;
@@ -251,7 +251,7 @@
 }
 
 TEST(Histtest, SpikyValues) {
-  UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("SpikyValues", 5, 4096));
+  std::unique_ptr<Histogram<uint64_t>> hist(new Histogram<uint64_t>("SpikyValues", 5, 4096));
   Histogram<uint64_t>::CumulativeData data;
 
   for (uint64_t idx = 0ull; idx < 30ull; idx++) {
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 730a2c2..b2ad1d0 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -19,7 +19,6 @@
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
@@ -31,9 +30,9 @@
 unsigned int gAborting = 0;
 
 static LogSeverity gMinimumLogSeverity = INFO;
-static UniquePtr<std::string> gCmdLine;
-static UniquePtr<std::string> gProgramInvocationName;
-static UniquePtr<std::string> gProgramInvocationShortName;
+static std::unique_ptr<std::string> gCmdLine;
+static std::unique_ptr<std::string> gProgramInvocationName;
+static std::unique_ptr<std::string> gProgramInvocationShortName;
 
 const char* GetCmdLine() {
   return (gCmdLine.get() != nullptr) ? gCmdLine->c_str() : nullptr;
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index bd5ae85..814195c 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -20,12 +20,13 @@
 #include <cerrno>
 #include <cstring>
 #include <iostream>  // NOLINT
+#include <memory>
 #include <sstream>
 #include <signal.h>
 #include <vector>
+
 #include "base/macros.h"
 #include "log_severity.h"
-#include "UniquePtr.h"
 
 #define CHECK(x) \
   if (UNLIKELY(!(x))) \
@@ -202,7 +203,7 @@
  private:
   static void LogLine(const LogMessageData& data, const char*);
 
-  const UniquePtr<LogMessageData> data_;
+  const std::unique_ptr<LogMessageData> data_;
 
   friend void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context);
   friend class Mutex;
@@ -286,16 +287,18 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
-  bool verifier;
   bool compiler;
-  bool heap;
   bool gc;
+  bool heap;
   bool jdwp;
   bool jni;
   bool monitor;
+  bool profiler;
+  bool signals;
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
+  bool verifier;
 };
 
 extern LogVerbosity gLogVerbosity;
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 8175514..47571f8 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -169,7 +169,7 @@
 // bionic and glibc both have TEMP_FAILURE_RETRY, but Mac OS' libc doesn't.
 #ifndef TEMP_FAILURE_RETRY
 #define TEMP_FAILURE_RETRY(exp) ({ \
-  typeof(exp) _rc; \
+  decltype(exp) _rc; \
   do { \
     _rc = (exp); \
   } while (_rc == -1 && errno == EINTR); \
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index a7e25cb..a9472f7 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -132,9 +132,21 @@
   // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
-          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
-          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
+    CHECK(runtime == nullptr || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
+          // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
+          // yet established.
+          level == kRuntimeShutdownLock ||
+          // Thread Ids are allocated/released before threads are established.
+          level == kAllocatedThreadIdsLock ||
+          // Thread LDT's are initialized without Thread::Current established.
+          level == kModifyLdtLock ||
+          // Threads are unregistered while holding the thread list lock, during this process they
+          // no longer exist and so we expect an unlock with no self.
+          level == kThreadListLock ||
+          // Ignore logging which may or may not have set up thread data structures.
+          level == kLoggingLock ||
+          // Avoid recursive death.
+          level == kAbortLock) << level;
   }
 }
 
@@ -221,7 +233,7 @@
       // Reduce state by 1.
       done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
       if (done && (cur_state - 1) == 0) {  // cas may fail due to noise?
-        if (num_pending_writers_ > 0 || num_pending_readers_ > 0) {
+        if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) {
           // Wake any exclusive waiters as there are now no readers.
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 2bc17bf..705be40 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -30,10 +30,12 @@
 namespace art {
 
 Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::allocated_thread_ids_lock_ = nullptr;
 Mutex* Locks::breakpoint_lock_ = nullptr;
 ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
+Mutex* Locks::modify_ldt_lock_ = nullptr;
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
@@ -71,12 +73,12 @@
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, mutex)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakAcquire(0, mutex)) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(mutex_, 0)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakRelease(mutex_, 0)) {
       NanoSleep(100);
     }
   }
@@ -174,34 +176,34 @@
                                  uint64_t owner_tid,
                                  uint64_t nano_time_blocked) {
   if (kLogLockContentions) {
-    ContentionLogData* data = contetion_log_data_;
+    ContentionLogData* data = contention_log_data_;
     ++(data->contention_count);
     data->AddToWaitTime(nano_time_blocked);
     ContentionLogEntry* log = data->contention_log;
     // This code is intentionally racy as it is only used for diagnostics.
-    uint32_t slot = data->cur_content_log_entry;
+    uint32_t slot = data->cur_content_log_entry.LoadRelaxed();
     if (log[slot].blocked_tid == blocked_tid &&
         log[slot].owner_tid == blocked_tid) {
       ++log[slot].count;
     } else {
       uint32_t new_slot;
       do {
-        slot = data->cur_content_log_entry;
+        slot = data->cur_content_log_entry.LoadRelaxed();
         new_slot = (slot + 1) % kContentionLogSize;
-      } while (!data->cur_content_log_entry.CompareAndSwap(slot, new_slot));
+      } while (!data->cur_content_log_entry.CompareExchangeWeakRelaxed(slot, new_slot));
       log[new_slot].blocked_tid = blocked_tid;
       log[new_slot].owner_tid = owner_tid;
-      log[new_slot].count = 1;
+      log[new_slot].count.StoreRelaxed(1);
     }
   }
 }
 
 void BaseMutex::DumpContention(std::ostream& os) const {
   if (kLogLockContentions) {
-    const ContentionLogData* data = contetion_log_data_;
+    const ContentionLogData* data = contention_log_data_;
     const ContentionLogEntry* log = data->contention_log;
     uint64_t wait_time = data->wait_time;
-    uint32_t contention_count = data->contention_count;
+    uint32_t contention_count = data->contention_count.LoadRelaxed();
     if (contention_count == 0) {
       os << "never contended";
     } else {
@@ -213,7 +215,7 @@
       for (size_t i = 0; i < kContentionLogSize; ++i) {
         uint64_t blocked_tid = log[i].blocked_tid;
         uint64_t owner_tid = log[i].owner_tid;
-        uint32_t count = log[i].count;
+        uint32_t count = log[i].count.LoadRelaxed();
         if (count > 0) {
           auto it = most_common_blocked.find(blocked_tid);
           if (it != most_common_blocked.end()) {
@@ -261,7 +263,7 @@
 #if ART_USE_FUTEXES
   state_ = 0;
   exclusive_owner_ = 0;
-  num_contenders_ = 0;
+  DCHECK_EQ(0, num_contenders_.LoadRelaxed());
 #elif defined(__BIONIC__) || defined(__APPLE__)
   // Use recursive mutexes for bionic and Apple otherwise the
   // non-recursive mutexes don't have TIDs to check lock ownership of.
@@ -283,7 +285,8 @@
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
-    CHECK_EQ(num_contenders_, 0) << "unexpectedly found a contender on mutex " << name_;
+    CHECK_EQ(num_contenders_.LoadRelaxed(), 0)
+        << "unexpectedly found a contender on mutex " << name_;
   }
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -406,7 +409,7 @@
       done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
       if (LIKELY(done)) {  // Spurious fail?
         // Wake a contender
-        if (UNLIKELY(num_contenders_ > 0)) {
+        if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
           futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
         }
       }
@@ -459,7 +462,7 @@
   CHECK_EQ(state_, 0);
   CHECK_EQ(exclusive_owner_, 0U);
   CHECK_EQ(num_pending_readers_, 0);
-  CHECK_EQ(num_pending_writers_, 0);
+  CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
   // may still be using locks.
@@ -523,7 +526,7 @@
       done =  __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */);
       if (LIKELY(done)) {  // cmpxchg may fail due to noise?
         // Wake any waiters.
-        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) {
+        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) {
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
@@ -646,7 +649,7 @@
 ConditionVariable::ConditionVariable(const char* name, Mutex& guard)
     : name_(name), guard_(guard) {
 #if ART_USE_FUTEXES
-  sequence_ = 0;
+  DCHECK_EQ(0, sequence_.LoadRelaxed());
   num_waiters_ = 0;
 #else
   pthread_condattr_t cond_attrs;
@@ -691,7 +694,7 @@
     sequence_++;  // Indicate the broadcast occurred.
     bool done = false;
     do {
-      int32_t cur_sequence = sequence_;
+      int32_t cur_sequence = sequence_.LoadRelaxed();
       // Requeue waiters onto mutex. The waiter holds the contender count on the mutex high ensuring
       // mutex unlocks will awaken the requeued waiter thread.
       done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
@@ -740,7 +743,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_;
+  int32_t cur_sequence = sequence_.LoadRelaxed();
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, NULL, NULL, 0) != 0) {
     // Futex failed, check it is an expected error.
@@ -754,7 +757,7 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_, 0);
+  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
   guard_.recursion_count_ = 0;
@@ -775,7 +778,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_;
+  int32_t cur_sequence = sequence_.LoadRelaxed();
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, &rel_ts, NULL, 0) != 0) {
     if (errno == ETIMEDOUT) {
@@ -790,7 +793,7 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_, 0);
+  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
   guard_.num_contenders_--;
 #else
 #if !defined(__APPLE__)
@@ -813,7 +816,13 @@
 void Locks::Init() {
   if (logging_lock_ != nullptr) {
     // Already initialized.
+    if (kRuntimeISA == kX86) {
+      DCHECK(modify_ldt_lock_ != nullptr);
+    } else {
+      DCHECK(modify_ldt_lock_ == nullptr);
+    }
     DCHECK(abort_lock_ != nullptr);
+    DCHECK(allocated_thread_ids_lock_ != nullptr);
     DCHECK(breakpoint_lock_ != nullptr);
     DCHECK(classlinker_classes_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
@@ -826,32 +835,76 @@
     DCHECK(unexpected_signal_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
   } else {
-    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
-    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+    // Create global locks in level order from highest lock level to lowest.
+    LockLevel current_lock_level = kMutatorLock;
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
 
+    #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
+        DCHECK_LT(new_level, current_lock_level); \
+        current_lock_level = new_level;
+
+    UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock);
+    DCHECK(heap_bitmap_lock_ == nullptr);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kRuntimeShutdownLock);
+    DCHECK(runtime_shutdown_lock_ == nullptr);
+    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
+    DCHECK(profiler_lock_ == nullptr);
+    profiler_lock_ = new Mutex("profiler lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kTraceLock);
+    DCHECK(trace_lock_ == nullptr);
+    trace_lock_ = new Mutex("trace lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
+    DCHECK(thread_list_lock_ == nullptr);
+    thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kBreakpointLock);
     DCHECK(breakpoint_lock_ == nullptr);
-    breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock);
+    breakpoint_lock_ = new Mutex("breakpoint lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
-                                                      kClassLinkerClassesLock);
-    DCHECK(heap_bitmap_lock_ == nullptr);
-    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(mutator_lock_ == nullptr);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(runtime_shutdown_lock_ == nullptr);
-    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
-    DCHECK(thread_list_lock_ == nullptr);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
-    DCHECK(thread_suspend_count_lock_ == nullptr);
-    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
-    DCHECK(trace_lock_ == nullptr);
-    trace_lock_ = new Mutex("trace lock", kTraceLock);
-    DCHECK(profiler_lock_ == nullptr);
-    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
-    DCHECK(unexpected_signal_lock_ == nullptr);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+                                                      current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock);
+    DCHECK(allocated_thread_ids_lock_ == nullptr);
+    allocated_thread_ids_lock_ =  new Mutex("allocated thread ids lock", current_lock_level);
+
+    if (kRuntimeISA == kX86) {
+      UPDATE_CURRENT_LOCK_LEVEL(kModifyLdtLock);
+      DCHECK(modify_ldt_lock_ == nullptr);
+      modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
+    }
+
+    UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
-    intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock);
+    intern_table_lock_ = new Mutex("InternTable lock", current_lock_level);
+
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
+    DCHECK(abort_lock_ == nullptr);
+    abort_lock_ = new Mutex("abort lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadSuspendCountLock);
+    DCHECK(thread_suspend_count_lock_ == nullptr);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kUnexpectedSignalLock);
+    DCHECK(unexpected_signal_lock_ == nullptr);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kLoggingLock);
+    DCHECK(logging_lock_ == nullptr);
+    logging_lock_ = new Mutex("logging lock", current_lock_level, true);
+
+    #undef UPDATE_CURRENT_LOCK_LEVEL
   }
 }
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index b50c098..522692e 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -62,6 +62,7 @@
   kRosAllocBracketLock,
   kRosAllocBulkFreeLock,
   kAllocSpaceLock,
+  kReferenceProcessorLock,
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
   kMarkSweepMarkStackLock,
@@ -73,6 +74,8 @@
   kPinTableLock,
   kLoadLibraryLock,
   kJdwpObjectRegistryLock,
+  kModifyLdtLock,
+  kAllocatedThreadIdsLock,
   kClassLinkerClassesLock,
   kBreakpointLock,
   kMonitorLock,
@@ -159,12 +162,12 @@
     void AddToWaitTime(uint64_t value);
     ContentionLogData() : wait_time(0) {}
   };
-  ContentionLogData contetion_log_data_[kContentionLogDataSize];
+  ContentionLogData contention_log_data_[kContentionLogDataSize];
 
  public:
   bool HasEverContended() const {
     if (kLogLockContentions) {
-      return contetion_log_data_->contention_count > 0;
+      return contention_log_data_->contention_count.LoadSequentiallyConsistent() > 0;
     }
     return false;
   }
@@ -531,28 +534,34 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
+  // Guards background profiler global state.
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
+  // Guards trace (ie traceview) requests.
+  static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(trace_lock_);
 
   // Guards breakpoints.
   static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_);
 
-  // Guards trace requests.
-  static Mutex* trace_lock_ ACQUIRED_AFTER(breakpoint_lock_);
-
-  // Guards profile objects.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
-
   // Guards lists of classes within the class linker.
-  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
+  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(breakpoint_lock_);
 
   // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to hold a higher level Mutex.
   #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
 
+  // Guard the allocation/deallocation of thread ids.
+  static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Guards modification of the LDT on x86.
+  static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
+
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
   // Have an exclusive aborting thread.
   static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 8a6605e..1d0b866 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -18,11 +18,10 @@
 #define ART_RUNTIME_BASE_UNIX_FILE_RANDOM_ACCESS_FILE_TEST_H_
 
 #include <errno.h>
-
+#include <memory>
 #include <string>
 
 #include "common_runtime_test.h"
-#include "UniquePtr.h"
 
 namespace unix_file {
 
@@ -62,7 +61,7 @@
 
   void TestRead() {
     char buf[256];
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
 
     // Reading from the start of an empty file gets you zero bytes, however many
     // you ask for.
@@ -77,7 +76,7 @@
 
   void TestReadContent(const std::string& content, RandomAccessFile* file) {
     const int buf_size = content.size() + 10;
-    UniquePtr<char> buf(new char[buf_size]);
+    std::unique_ptr<char> buf(new char[buf_size]);
     // Can't read from a negative offset.
     ASSERT_EQ(-EINVAL, file->Read(buf.get(), 0, -123));
 
@@ -107,7 +106,7 @@
 
   void TestSetLength() {
     const std::string content("hello");
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
     ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
     ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
 
@@ -132,7 +131,7 @@
 
   void TestWrite() {
     const std::string content("hello");
-    UniquePtr<RandomAccessFile> file(MakeTestFile());
+    std::unique_ptr<RandomAccessFile> file(MakeTestFile());
 
     // Can't write to a negative offset.
     ASSERT_EQ(-EINVAL, file->Write(content.data(), 0, -123));
diff --git a/runtime/catch_block_stack_visitor.cc b/runtime/catch_block_stack_visitor.cc
index 8d10a97..55b330a 100644
--- a/runtime/catch_block_stack_visitor.cc
+++ b/runtime/catch_block_stack_visitor.cc
@@ -19,7 +19,7 @@
 #include "dex_instruction.h"
 #include "mirror/art_method-inl.h"
 #include "quick_exception_handler.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -50,7 +50,17 @@
   }
   if (dex_pc != DexFile::kDexNoIndex) {
     bool clear_exception = false;
-    uint32_t found_dex_pc = method->FindCatchBlock(to_find_, dex_pc, &clear_exception);
+    bool exc_changed = false;
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::Class> to_find(hs.NewHandle((*exception_)->GetClass()));
+    uint32_t found_dex_pc = method->FindCatchBlock(to_find, dex_pc, &clear_exception,
+                                                   &exc_changed);
+    if (UNLIKELY(exc_changed)) {
+      DCHECK_EQ(DexFile::kDexNoIndex, found_dex_pc);
+      exception_->Assign(self_->GetException(nullptr));  // TODO: Throw location?
+      // There is a new context installed, delete it.
+      delete self_->GetLongJumpContext();
+    }
     exception_handler_->SetClearException(clear_exception);
     if (found_dex_pc != DexFile::kDexNoIndex) {
       exception_handler_->SetHandlerDexPc(found_dex_pc);
diff --git a/runtime/catch_block_stack_visitor.h b/runtime/catch_block_stack_visitor.h
index 6f0fe11..f45cf03 100644
--- a/runtime/catch_block_stack_visitor.h
+++ b/runtime/catch_block_stack_visitor.h
@@ -19,7 +19,7 @@
 
 #include "mirror/object-inl.h"
 #include "stack.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -34,10 +34,10 @@
 // Finds catch handler or prepares deoptimization.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
  public:
-  CatchBlockStackVisitor(Thread* self, Context* context, SirtRef<mirror::Throwable>& exception,
+  CatchBlockStackVisitor(Thread* self, Context* context, Handle<mirror::Throwable>* exception,
                          QuickExceptionHandler* exception_handler)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, context), self_(self), to_find_(self, exception->GetClass()),
+      : StackVisitor(self, context), self_(self), exception_(exception),
         exception_handler_(exception_handler) {
   }
 
@@ -48,7 +48,7 @@
 
   Thread* const self_;
   // The type of the exception catch block to find.
-  SirtRef<mirror::Class> to_find_;
+  Handle<mirror::Throwable>* exception_;
   QuickExceptionHandler* const exception_handler_;
 
   DISALLOW_COPY_AND_ASSIGN(CatchBlockStackVisitor);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 3df050e..cfd0c00 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -86,9 +86,9 @@
  * ===========================================================================
  */
 
-static bool IsSirtLocalRef(JNIEnv* env, jobject localRef) {
-  return GetIndirectRefKind(localRef) == kSirtOrInvalid &&
-      reinterpret_cast<JNIEnvExt*>(env)->self->SirtContains(localRef);
+static bool IsHandleScopeLocalRef(JNIEnv* env, jobject localRef) {
+  return GetIndirectRefKind(localRef) == kHandleScopeOrInvalid &&
+      reinterpret_cast<JNIEnvExt*>(env)->self->HandleScopeContains(localRef);
 }
 
 // Flags passed into ScopedCheck.
@@ -179,7 +179,7 @@
   // times, so using "java.lang.Thread" instead of "java/lang/Thread" might work in some
   // circumstances, but this is incorrect.
   void CheckClassName(const char* class_name) {
-    if (!IsValidJniClassName(class_name)) {
+    if ((class_name == nullptr) || !IsValidJniClassName(class_name)) {
       JniAbortF(function_name_,
                 "illegal class name '%s'\n"
                 "    (should be of the form 'package/Class', [Lpackage/Class;' or '[[B')",
@@ -1243,7 +1243,7 @@
 
   static void DeleteLocalRef(JNIEnv* env, jobject localRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, localRef);
-    if (localRef != nullptr && GetIndirectRefKind(localRef) != kLocal && !IsSirtLocalRef(env, localRef)) {
+    if (localRef != nullptr && GetIndirectRefKind(localRef) != kLocal && !IsHandleScopeLocalRef(env, localRef)) {
       JniAbortF(__FUNCTION__, "DeleteLocalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(localRef)).c_str(), localRef);
     } else {
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 6c53563..84afb2d 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -24,7 +24,7 @@
 #include "mirror/iftable.h"
 #include "mirror/object_array.h"
 #include "object_utils.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -34,8 +34,7 @@
 }
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
-  return FindClass(self, descriptor, class_loader);
+  return FindClass(self, descriptor, NullHandle<mirror::ClassLoader>());
 }
 
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class* element_class) {
@@ -48,8 +47,9 @@
   }
   DCHECK(!element_class->IsPrimitiveVoid());
   std::string descriptor("[");
-  descriptor += ClassHelper(element_class).GetDescriptor();
-  SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
+  descriptor += element_class->GetDescriptor();
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(element_class->GetClassLoader()));
   mirror::Class* array_class = FindClass(self, descriptor.c_str(), class_loader);
   // Benign races in storing array class and incrementing index.
   size_t victim_index = find_array_class_cache_next_victim_;
@@ -63,9 +63,13 @@
   mirror::String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
   if (UNLIKELY(resolved_string == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), declaring_class->GetDexCache());
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_string = ResolveString(dex_file, string_idx, dex_cache);
+    if (resolved_string != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), resolved_string);
+    }
   }
   return resolved_string;
 }
@@ -73,13 +77,16 @@
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx,
                                                mirror::ArtMethod* referrer) {
   mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
-  if (UNLIKELY(resolved_type == NULL)) {
+  if (UNLIKELY(resolved_type == nullptr)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    Thread* self = Thread::Current();
-    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
-    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
+    if (resolved_type != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
+    }
   }
   return resolved_type;
 }
@@ -89,43 +96,68 @@
   mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
   mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
-    Thread* self = Thread::Current();
-    SirtRef<mirror::DexCache> dex_cache(self, dex_cache_ptr);
-    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(dex_cache_ptr));
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
+    if (resolved_type != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
+    }
   }
   return resolved_type;
 }
 
-inline mirror::ArtMethod* ClassLinker::ResolveMethod(uint32_t method_idx,
-                                                     mirror::ArtMethod* referrer,
-                                                     InvokeType type) {
+inline mirror::ArtMethod* ClassLinker::GetResolvedMethod(uint32_t method_idx,
+                                                         mirror::ArtMethod* referrer,
+                                                         InvokeType type) {
   mirror::ArtMethod* resolved_method =
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
-  if (UNLIKELY(resolved_method == NULL || resolved_method->IsRuntimeMethod())) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    Thread* self = Thread::Current();
-    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
-    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_method = ResolveMethod(dex_file, method_idx, dex_cache, class_loader, referrer, type);
+  if (resolved_method == nullptr || resolved_method->IsRuntimeMethod()) {
+    return nullptr;
   }
   return resolved_method;
 }
 
-inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
-                                                   mirror::ArtMethod* referrer,
+inline mirror::ArtMethod* ClassLinker::ResolveMethod(Thread* self, uint32_t method_idx,
+                                                     mirror::ArtMethod** referrer,
+                                                     InvokeType type) {
+  mirror::ArtMethod* resolved_method = GetResolvedMethod(method_idx, *referrer, type);
+  if (LIKELY(resolved_method != nullptr)) {
+    return resolved_method;
+  }
+  mirror::Class* declaring_class = (*referrer)->GetDeclaringClass();
+  StackHandleScope<3> hs(self);
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
+  HandleWrapper<mirror::ArtMethod> h_referrer(hs.NewHandleWrapper(referrer));
+  const DexFile* dex_file = h_dex_cache->GetDexFile();
+  resolved_method = ResolveMethod(*dex_file, method_idx, h_dex_cache, h_class_loader, h_referrer,
+                                  type);
+  if (resolved_method != nullptr) {
+    DCHECK_EQ(h_dex_cache->GetResolvedMethod(method_idx), resolved_method);
+  }
+  return resolved_method;
+}
+
+inline mirror::ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
+                                                       mirror::Class* field_declaring_class) {
+  return field_declaring_class->GetDexCache()->GetResolvedField(field_idx);
+}
+
+inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                                    bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::ArtField* resolved_field =
-      declaring_class->GetDexCache()->GetResolvedField(field_idx);
+  mirror::ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
   if (UNLIKELY(resolved_field == NULL)) {
-    Thread* self = Thread::Current();
-    SirtRef<mirror::DexCache>  dex_cache(self, declaring_class->GetDexCache());
-    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
+    if (resolved_field != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedField(field_idx), resolved_field);
+    }
   }
   return resolved_field;
 }
@@ -159,9 +191,8 @@
 
 inline mirror::ObjectArray<mirror::ArtField>* ClassLinker::AllocArtFieldArray(Thread* self,
                                                                               size_t length) {
-  return mirror::ObjectArray<mirror::ArtField>::Alloc(self,
-                                                      GetClassRoot(kJavaLangReflectArtFieldArrayClass),
-                                                      length);
+  return mirror::ObjectArray<mirror::ArtField>::Alloc(
+      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length);
 }
 
 inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root)
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 703229c..afff7a2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -19,8 +19,8 @@
 #include <fcntl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
-
 #include <deque>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -37,6 +37,7 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
+#include "handle_scope.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "leb128.h"
@@ -59,10 +60,8 @@
 #include "entrypoints/entrypoint_utils.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref.h"
-#include "stack_indirect_reference_table.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
 #include "utils.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
@@ -96,8 +95,8 @@
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   if (c->GetVerifyErrorClass() != NULL) {
     // TODO: change the verifier to store an _instance_, with a useful detail message?
-    ClassHelper ve_ch(c->GetVerifyErrorClass());
-    self->ThrowNewException(throw_location, ve_ch.GetDescriptor(), PrettyDescriptor(c).c_str());
+    self->ThrowNewException(throw_location, c->GetVerifyErrorClass()->GetDescriptor().c_str(),
+                            PrettyDescriptor(c).c_str());
   } else {
     self->ThrowNewException(throw_location, "Ljava/lang/NoClassDefFoundError;",
                             PrettyDescriptor(c).c_str());
@@ -191,6 +190,9 @@
   memset(find_array_class_cache_, 0, kFindArrayCacheSize * sizeof(mirror::Class*));
 }
 
+// To set a value for generic JNI. May be necessary in compiler tests.
+extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
+
 void ClassLinker::InitFromCompiler(const std::vector<const DexFile*>& boot_class_path) {
   VLOG(startup) << "ClassLinker::Init";
   CHECK(Runtime::Current()->IsCompiler());
@@ -202,11 +204,13 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // The GC can't handle an object with a null class since we can't get the size of this object.
   heap->IncrementDisableMovingGC(self);
-  SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>(
-      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass), VoidFunctor())));
-  CHECK(java_lang_Class.get() != NULL);
-  mirror::Class::SetClassClass(java_lang_Class.get());
-  java_lang_Class->SetClass(java_lang_Class.get());
+  StackHandleScope<64> hs(self);  // 64 is picked arbitrarily.
+  Handle<mirror::Class> java_lang_Class(hs.NewHandle(down_cast<mirror::Class*>(
+      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass),
+                                        VoidFunctor()))));
+  CHECK(java_lang_Class.Get() != NULL);
+  mirror::Class::SetClassClass(java_lang_Class.Get());
+  java_lang_Class->SetClass(java_lang_Class.Get());
   if (kUseBakerOrBrooksReadBarrier) {
     java_lang_Class->AssertReadBarrierPointer();
   }
@@ -215,44 +219,50 @@
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
-  SirtRef<mirror::Class> class_array_class(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
-  class_array_class->SetComponentType(java_lang_Class.get());
+  Handle<mirror::Class> class_array_class(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  class_array_class->SetComponentType(java_lang_Class.Get());
 
   // java_lang_Object comes next so that object_array_class can be created.
-  SirtRef<mirror::Class> java_lang_Object(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
-  CHECK(java_lang_Object.get() != NULL);
+  Handle<mirror::Class> java_lang_Object(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  CHECK(java_lang_Object.Get() != NULL);
   // backfill Object as the super class of Class.
-  java_lang_Class->SetSuperClass(java_lang_Object.get());
+  java_lang_Class->SetSuperClass(java_lang_Object.Get());
   java_lang_Object->SetStatus(mirror::Class::kStatusLoaded, self);
 
   // Object[] next to hold class roots.
-  SirtRef<mirror::Class> object_array_class(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
-  object_array_class->SetComponentType(java_lang_Object.get());
+  Handle<mirror::Class> object_array_class(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  object_array_class->SetComponentType(java_lang_Object.Get());
 
   // Setup the char class to be used for char[].
-  SirtRef<mirror::Class> char_class(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
+  Handle<mirror::Class> char_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                           sizeof(mirror::Class))));
 
   // Setup the char[] class to be used for String.
-  SirtRef<mirror::Class> char_array_class(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
-  char_array_class->SetComponentType(char_class.get());
-  mirror::CharArray::SetArrayClass(char_array_class.get());
+  Handle<mirror::Class> char_array_class(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                                 sizeof(mirror::Class))));
+  char_array_class->SetComponentType(char_class.Get());
+  mirror::CharArray::SetArrayClass(char_array_class.Get());
 
   // Setup String.
-  SirtRef<mirror::Class> java_lang_String(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::StringClass)));
-  mirror::String::SetClass(java_lang_String.get());
+  Handle<mirror::Class> java_lang_String(hs.NewHandle(AllocClass(self, java_lang_Class.Get(),
+                                                                 sizeof(mirror::StringClass))));
+  mirror::String::SetClass(java_lang_String.Get());
   java_lang_String->SetObjectSize(sizeof(mirror::String));
   java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
 
   // Create storage for root classes, save away our work so far (requires descriptors).
-  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.get(),
+  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.Get(),
                                                            kClassRootsMax);
   CHECK(class_roots_ != NULL);
-  SetClassRoot(kJavaLangClass, java_lang_Class.get());
-  SetClassRoot(kJavaLangObject, java_lang_Object.get());
-  SetClassRoot(kClassArrayClass, class_array_class.get());
-  SetClassRoot(kObjectArrayClass, object_array_class.get());
-  SetClassRoot(kCharArrayClass, char_array_class.get());
-  SetClassRoot(kJavaLangString, java_lang_String.get());
+  SetClassRoot(kJavaLangClass, java_lang_Class.Get());
+  SetClassRoot(kJavaLangObject, java_lang_Object.Get());
+  SetClassRoot(kClassArrayClass, class_array_class.Get());
+  SetClassRoot(kObjectArrayClass, object_array_class.Get());
+  SetClassRoot(kCharArrayClass, char_array_class.Get());
+  SetClassRoot(kJavaLangString, java_lang_String.Get());
 
   // Setup the primitive type classes.
   SetClassRoot(kPrimitiveBoolean, CreatePrimitiveClass(self, Primitive::kPrimBoolean));
@@ -268,53 +278,55 @@
   array_iftable_ = AllocIfTable(self, 2);
 
   // Create int array type for AllocDexCache (done in AppendToBootClassPath).
-  SirtRef<mirror::Class> int_array_class(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::Class)));
+  Handle<mirror::Class> int_array_class(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
   int_array_class->SetComponentType(GetClassRoot(kPrimitiveInt));
-  mirror::IntArray::SetArrayClass(int_array_class.get());
-  SetClassRoot(kIntArrayClass, int_array_class.get());
+  mirror::IntArray::SetArrayClass(int_array_class.Get());
+  SetClassRoot(kIntArrayClass, int_array_class.Get());
 
   // now that these are registered, we can use AllocClass() and AllocObjectArray
 
   // Set up DexCache. This cannot be done later since AppendToBootClassPath calls AllocDexCache.
-  SirtRef<mirror::Class>
-      java_lang_DexCache(self, AllocClass(self, java_lang_Class.get(), sizeof(mirror::DexCacheClass)));
-  SetClassRoot(kJavaLangDexCache, java_lang_DexCache.get());
+  Handle<mirror::Class> java_lang_DexCache(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::DexCacheClass))));
+  SetClassRoot(kJavaLangDexCache, java_lang_DexCache.Get());
   java_lang_DexCache->SetObjectSize(sizeof(mirror::DexCache));
   java_lang_DexCache->SetStatus(mirror::Class::kStatusResolved, self);
 
-  // Constructor, Field, Method, and AbstractMethod are necessary so that FindClass can link members.
-  SirtRef<mirror::Class> java_lang_reflect_ArtField(self, AllocClass(self, java_lang_Class.get(),
-                                                                     sizeof(mirror::ArtFieldClass)));
-  CHECK(java_lang_reflect_ArtField.get() != NULL);
+  // Constructor, Field, Method, and AbstractMethod are necessary so
+  // that FindClass can link members.
+  Handle<mirror::Class> java_lang_reflect_ArtField(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::ArtFieldClass))));
+  CHECK(java_lang_reflect_ArtField.Get() != NULL);
   java_lang_reflect_ArtField->SetObjectSize(sizeof(mirror::ArtField));
-  SetClassRoot(kJavaLangReflectArtField, java_lang_reflect_ArtField.get());
+  SetClassRoot(kJavaLangReflectArtField, java_lang_reflect_ArtField.Get());
   java_lang_reflect_ArtField->SetStatus(mirror::Class::kStatusResolved, self);
-  mirror::ArtField::SetClass(java_lang_reflect_ArtField.get());
+  mirror::ArtField::SetClass(java_lang_reflect_ArtField.Get());
 
-  SirtRef<mirror::Class> java_lang_reflect_ArtMethod(self, AllocClass(self, java_lang_Class.get(),
-                                                                      sizeof(mirror::ArtMethodClass)));
-  CHECK(java_lang_reflect_ArtMethod.get() != NULL);
+  Handle<mirror::Class> java_lang_reflect_ArtMethod(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::ArtMethodClass))));
+  CHECK(java_lang_reflect_ArtMethod.Get() != NULL);
   java_lang_reflect_ArtMethod->SetObjectSize(sizeof(mirror::ArtMethod));
-  SetClassRoot(kJavaLangReflectArtMethod, java_lang_reflect_ArtMethod.get());
+  SetClassRoot(kJavaLangReflectArtMethod, java_lang_reflect_ArtMethod.Get());
   java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusResolved, self);
 
-  mirror::ArtMethod::SetClass(java_lang_reflect_ArtMethod.get());
+  mirror::ArtMethod::SetClass(java_lang_reflect_ArtMethod.Get());
 
   // Set up array classes for string, field, method
-  SirtRef<mirror::Class> object_array_string(self, AllocClass(self, java_lang_Class.get(),
-                                                              sizeof(mirror::Class)));
-  object_array_string->SetComponentType(java_lang_String.get());
-  SetClassRoot(kJavaLangStringArrayClass, object_array_string.get());
+  Handle<mirror::Class> object_array_string(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  object_array_string->SetComponentType(java_lang_String.Get());
+  SetClassRoot(kJavaLangStringArrayClass, object_array_string.Get());
 
-  SirtRef<mirror::Class> object_array_art_method(self, AllocClass(self, java_lang_Class.get(),
-                                                                  sizeof(mirror::Class)));
-  object_array_art_method->SetComponentType(java_lang_reflect_ArtMethod.get());
-  SetClassRoot(kJavaLangReflectArtMethodArrayClass, object_array_art_method.get());
+  Handle<mirror::Class> object_array_art_method(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  object_array_art_method->SetComponentType(java_lang_reflect_ArtMethod.Get());
+  SetClassRoot(kJavaLangReflectArtMethodArrayClass, object_array_art_method.Get());
 
-  SirtRef<mirror::Class> object_array_art_field(self, AllocClass(self, java_lang_Class.get(),
-                                                                 sizeof(mirror::Class)));
-  object_array_art_field->SetComponentType(java_lang_reflect_ArtField.get());
-  SetClassRoot(kJavaLangReflectArtFieldArrayClass, object_array_art_field.get());
+  Handle<mirror::Class> object_array_art_field(
+      hs.NewHandle(AllocClass(self, java_lang_Class.Get(), sizeof(mirror::Class))));
+  object_array_art_field->SetComponentType(java_lang_reflect_ArtField.Get());
+  SetClassRoot(kJavaLangReflectArtFieldArrayClass, object_array_art_field.Get());
 
   // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
@@ -329,8 +341,8 @@
   // now we can use FindSystemClass
 
   // run char class through InitializePrimitiveClass to finish init
-  InitializePrimitiveClass(char_class.get(), Primitive::kPrimChar);
-  SetClassRoot(kPrimitiveChar, char_class.get());  // needs descriptor
+  InitializePrimitiveClass(char_class.Get(), Primitive::kPrimChar);
+  SetClassRoot(kPrimitiveChar, char_class.Get());  // needs descriptor
 
   // Create runtime resolution and imt conflict methods. Also setup the default imt.
   Runtime* runtime = Runtime::Current();
@@ -338,19 +350,23 @@
   runtime->SetImtConflictMethod(runtime->CreateImtConflictMethod());
   runtime->SetDefaultImt(runtime->CreateDefaultImt(this));
 
+  // Set up GenericJNI entrypoint. That is mainly a hack for common_compiler_test.h so that
+  // we do not need friend classes or a publicly exposed setter.
+  quick_generic_jni_trampoline_ = reinterpret_cast<void*>(art_quick_generic_jni_trampoline);
+
   // Object, String and DexCache need to be rerun through FindSystemClass to finish init
   java_lang_Object->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* Object_class = FindSystemClass(self, "Ljava/lang/Object;");
-  CHECK_EQ(java_lang_Object.get(), Object_class);
+  CHECK_EQ(java_lang_Object.Get(), Object_class);
   CHECK_EQ(java_lang_Object->GetObjectSize(), sizeof(mirror::Object));
   java_lang_String->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* String_class = FindSystemClass(self, "Ljava/lang/String;");
-  CHECK_EQ(java_lang_String.get(), String_class);
+  CHECK_EQ(java_lang_String.Get(), String_class);
   CHECK_EQ(java_lang_String->GetObjectSize(), sizeof(mirror::String));
   java_lang_DexCache->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* DexCache_class = FindSystemClass(self, "Ljava/lang/DexCache;");
-  CHECK_EQ(java_lang_String.get(), String_class);
-  CHECK_EQ(java_lang_DexCache.get(), DexCache_class);
+  CHECK_EQ(java_lang_String.Get(), String_class);
+  CHECK_EQ(java_lang_DexCache.Get(), DexCache_class);
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), sizeof(mirror::DexCache));
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
@@ -361,13 +377,13 @@
   mirror::ByteArray::SetArrayClass(GetClassRoot(kByteArrayClass));
 
   mirror::Class* found_char_array_class = FindSystemClass(self, "[C");
-  CHECK_EQ(char_array_class.get(), found_char_array_class);
+  CHECK_EQ(char_array_class.Get(), found_char_array_class);
 
   SetClassRoot(kShortArrayClass, FindSystemClass(self, "[S"));
   mirror::ShortArray::SetArrayClass(GetClassRoot(kShortArrayClass));
 
   mirror::Class* found_int_array_class = FindSystemClass(self, "[I");
-  CHECK_EQ(int_array_class.get(), found_int_array_class);
+  CHECK_EQ(int_array_class.Get(), found_int_array_class);
 
   SetClassRoot(kLongArrayClass, FindSystemClass(self, "[J"));
   mirror::LongArray::SetArrayClass(GetClassRoot(kLongArrayClass));
@@ -379,10 +395,10 @@
   mirror::DoubleArray::SetArrayClass(GetClassRoot(kDoubleArrayClass));
 
   mirror::Class* found_class_array_class = FindSystemClass(self, "[Ljava/lang/Class;");
-  CHECK_EQ(class_array_class.get(), found_class_array_class);
+  CHECK_EQ(class_array_class.Get(), found_class_array_class);
 
   mirror::Class* found_object_array_class = FindSystemClass(self, "[Ljava/lang/Object;");
-  CHECK_EQ(object_array_class.get(), found_object_array_class);
+  CHECK_EQ(object_array_class.Get(), found_object_array_class);
 
   // Setup the single, global copy of "iftable".
   mirror::Class* java_lang_Cloneable = FindSystemClass(self, "Ljava/lang/Cloneable;");
@@ -395,35 +411,34 @@
   array_iftable_->SetInterface(1, java_io_Serializable);
 
   // Sanity check Class[] and Object[]'s interfaces.
-  ClassHelper kh(class_array_class.get());
-  CHECK_EQ(java_lang_Cloneable, kh.GetDirectInterface(0));
-  CHECK_EQ(java_io_Serializable, kh.GetDirectInterface(1));
-  kh.ChangeClass(object_array_class.get());
-  CHECK_EQ(java_lang_Cloneable, kh.GetDirectInterface(0));
-  CHECK_EQ(java_io_Serializable, kh.GetDirectInterface(1));
+  CHECK_EQ(java_lang_Cloneable, mirror::Class::GetDirectInterface(self, class_array_class, 0));
+  CHECK_EQ(java_io_Serializable, mirror::Class::GetDirectInterface(self, class_array_class, 1));
+  CHECK_EQ(java_lang_Cloneable, mirror::Class::GetDirectInterface(self, object_array_class, 0));
+  CHECK_EQ(java_io_Serializable, mirror::Class::GetDirectInterface(self, object_array_class, 1));
   // Run Class, ArtField, and ArtMethod through FindSystemClass. This initializes their
   // dex_cache_ fields and register them in class_table_.
   mirror::Class* Class_class = FindSystemClass(self, "Ljava/lang/Class;");
-  CHECK_EQ(java_lang_Class.get(), Class_class);
+  CHECK_EQ(java_lang_Class.Get(), Class_class);
 
   java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* Art_method_class = FindSystemClass(self, "Ljava/lang/reflect/ArtMethod;");
-  CHECK_EQ(java_lang_reflect_ArtMethod.get(), Art_method_class);
+  CHECK_EQ(java_lang_reflect_ArtMethod.Get(), Art_method_class);
 
   java_lang_reflect_ArtField->SetStatus(mirror::Class::kStatusNotReady, self);
   mirror::Class* Art_field_class = FindSystemClass(self, "Ljava/lang/reflect/ArtField;");
-  CHECK_EQ(java_lang_reflect_ArtField.get(), Art_field_class);
+  CHECK_EQ(java_lang_reflect_ArtField.Get(), Art_field_class);
 
-  mirror::Class* String_array_class = FindSystemClass(self, class_roots_descriptors_[kJavaLangStringArrayClass]);
-  CHECK_EQ(object_array_string.get(), String_array_class);
+  mirror::Class* String_array_class =
+      FindSystemClass(self, class_roots_descriptors_[kJavaLangStringArrayClass]);
+  CHECK_EQ(object_array_string.Get(), String_array_class);
 
   mirror::Class* Art_method_array_class =
       FindSystemClass(self, class_roots_descriptors_[kJavaLangReflectArtMethodArrayClass]);
-  CHECK_EQ(object_array_art_method.get(), Art_method_array_class);
+  CHECK_EQ(object_array_art_method.Get(), Art_method_array_class);
 
   mirror::Class* Art_field_array_class =
       FindSystemClass(self, class_roots_descriptors_[kJavaLangReflectArtFieldArrayClass]);
-  CHECK_EQ(object_array_art_field.get(), Art_field_array_class);
+  CHECK_EQ(object_array_art_field.Get(), Art_field_array_class);
 
   // End of special init trickery, subsequent classes may be loaded via FindSystemClass.
 
@@ -434,18 +449,22 @@
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   mirror::Class* java_lang_ref_Reference = FindSystemClass(self, "Ljava/lang/ref/Reference;");
   SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference);
-  mirror::Class* java_lang_ref_FinalizerReference = FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
+  mirror::Class* java_lang_ref_FinalizerReference =
+      FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
   java_lang_ref_FinalizerReference->SetAccessFlags(
       java_lang_ref_FinalizerReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsFinalizerReference);
-  mirror::Class* java_lang_ref_PhantomReference = FindSystemClass(self, "Ljava/lang/ref/PhantomReference;");
+  mirror::Class* java_lang_ref_PhantomReference =
+      FindSystemClass(self, "Ljava/lang/ref/PhantomReference;");
   java_lang_ref_PhantomReference->SetAccessFlags(
       java_lang_ref_PhantomReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsPhantomReference);
-  mirror::Class* java_lang_ref_SoftReference = FindSystemClass(self, "Ljava/lang/ref/SoftReference;");
+  mirror::Class* java_lang_ref_SoftReference =
+      FindSystemClass(self, "Ljava/lang/ref/SoftReference;");
   java_lang_ref_SoftReference->SetAccessFlags(
       java_lang_ref_SoftReference->GetAccessFlags() | kAccClassIsReference);
-  mirror::Class* java_lang_ref_WeakReference = FindSystemClass(self, "Ljava/lang/ref/WeakReference;");
+  mirror::Class* java_lang_ref_WeakReference =
+      FindSystemClass(self, "Ljava/lang/ref/WeakReference;");
   java_lang_ref_WeakReference->SetAccessFlags(
       java_lang_ref_WeakReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsWeakReference);
@@ -459,9 +478,11 @@
   // java.lang.StackTraceElement as a convenience.
   SetClassRoot(kJavaLangThrowable, FindSystemClass(self, "Ljava/lang/Throwable;"));
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
-  SetClassRoot(kJavaLangClassNotFoundException, FindSystemClass(self, "Ljava/lang/ClassNotFoundException;"));
+  SetClassRoot(kJavaLangClassNotFoundException,
+               FindSystemClass(self, "Ljava/lang/ClassNotFoundException;"));
   SetClassRoot(kJavaLangStackTraceElement, FindSystemClass(self, "Ljava/lang/StackTraceElement;"));
-  SetClassRoot(kJavaLangStackTraceElementArrayClass, FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
+  SetClassRoot(kJavaLangStackTraceElementArrayClass,
+               FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
   FinishInit(self);
@@ -529,8 +550,9 @@
   for (size_t i = 0; i < ClassLinker::kClassRootsMax; ++i) {
     mirror::Class* c = GetClassRoot(ClassRoot(i));
     if (!c->IsArrayClass() && !c->IsPrimitive()) {
-      SirtRef<mirror::Class> sirt_class(self, GetClassRoot(ClassRoot(i)));
-      EnsureInitialized(sirt_class, true, true);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(GetClassRoot(ClassRoot(i))));
+      EnsureInitialized(h_class, true, true);
       self->AssertNoPendingException();
     }
   }
@@ -546,7 +568,7 @@
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   std::string boot_image_option("--boot-image=");
-  boot_image_option += heap->GetImageSpace()->GetImageFilename();
+  boot_image_option += heap->GetImageSpace()->GetImageLocation();
 
   std::string dex_file_option("--dex-file=");
   dex_file_option += dex_filename;
@@ -616,8 +638,8 @@
   return FindOpenedOatFileFromDexLocation(dex_location, &dex_location_checksum);
 }
 
-const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(const char* dex_location,
-                                                             const uint32_t* const dex_location_checksum) {
+const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(
+    const char* dex_location, const uint32_t* const dex_location_checksum) {
   ReaderMutexLock mu(Thread::Current(), dex_lock_);
   for (size_t i = 0; i < oat_files_.size(); i++) {
     const OatFile* oat_file = oat_files_[i];
@@ -636,7 +658,7 @@
                                                      uint32_t dex_location_checksum,
                                                      const char* oat_location,
                                                      std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
                                             !Runtime::Current()->IsCompiler(),
                                             error_msg));
   if (oat_file.get() == nullptr) {
@@ -734,15 +756,16 @@
   }
 
  private:
-  UniquePtr<File> file_;
+  std::unique_ptr<File> file_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
 };
 
-const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(const char* dex_location,
-                                                              uint32_t dex_location_checksum,
-                                                              const char* oat_location,
-                                                              std::vector<std::string>* error_msgs) {
+const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(
+    const char* dex_location,
+    uint32_t dex_location_checksum,
+    const char* oat_location,
+    std::vector<std::string>* error_msgs) {
   // We play a locking game here so that if two different processes
   // race to generate (or worse, one tries to open a partial generated
   // file) we will be okay. This is actually common with apps that use
@@ -773,7 +796,7 @@
     error_msgs->push_back(error_msg);
     return nullptr;
   }
-  UniquePtr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
                                             !Runtime::Current()->IsCompiler(),
                                             &error_msg));
   if (oat_file.get() == nullptr) {
@@ -803,15 +826,32 @@
 bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file,
                                          const char* dex_location,
                                          uint32_t dex_location_checksum,
+                                         const InstructionSet instruction_set,
                                          std::string* error_msg) {
   Runtime* runtime = Runtime::Current();
-  const ImageHeader& image_header = runtime->GetHeap()->GetImageSpace()->GetImageHeader();
-  uint32_t image_oat_checksum = image_header.GetOatChecksum();
-  uintptr_t image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
-  bool image_check = ((oat_file->GetOatHeader().GetImageFileLocationOatChecksum() == image_oat_checksum)
-                      && (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin() == image_oat_data_begin));
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
 
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, &dex_location_checksum);
+  // If the requested instruction set is the same as the current runtime,
+  // we can use the checksums directly. If it isn't, we'll have to read the
+  // image header from the image for the right instruction set.
+  uint32_t image_oat_checksum = 0;
+  uintptr_t image_oat_data_begin = 0;
+  if (instruction_set == kRuntimeISA) {
+    const ImageHeader& image_header = image_space->GetImageHeader();
+    image_oat_checksum = image_header.GetOatChecksum();
+    image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
+  } else {
+    std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie(
+        image_space->GetImageLocation().c_str(), instruction_set));
+    image_oat_checksum = image_header->GetOatChecksum();
+    image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+  }
+  const OatHeader& oat_header = oat_file->GetOatHeader();
+  bool image_check = ((oat_header.GetImageFileLocationOatChecksum() == image_oat_checksum)
+                      && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin));
+
+  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
+                                                                    &dex_location_checksum);
   if (oat_dex_file == NULL) {
     *error_msg = StringPrintf("oat file '%s' does not contain contents for '%s' with checksum 0x%x",
                               oat_file->GetLocation().c_str(), dex_location, dex_location_checksum);
@@ -851,7 +891,7 @@
                                                             const char* dex_location,
                                                             std::string* error_msg,
                                                             bool* open_failed) {
-  UniquePtr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
+  std::unique_ptr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
   if (oat_file.get() == nullptr) {
     *open_failed = true;
     return nullptr;
@@ -873,7 +913,7 @@
     dex_file = oat_dex_file->OpenDexFile(error_msg);
   } else {
     bool verified = VerifyOatFileChecksums(oat_file.get(), dex_location, dex_location_checksum,
-                                           error_msg);
+                                           kRuntimeISA, error_msg);
     if (!verified) {
       return nullptr;
     }
@@ -886,9 +926,11 @@
   return dex_file;
 }
 
-const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_location,
-                                                                const uint32_t* const dex_location_checksum,
-                                                                std::vector<std::string>* error_msgs) {
+const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(
+    const char* dex_location,
+    const uint32_t* const dex_location_checksum,
+    InstructionSet isa,
+    std::vector<std::string>* error_msgs) {
   const OatFile* open_oat_file = FindOpenedOatFileFromDexLocation(dex_location,
                                                                   dex_location_checksum);
   if (open_oat_file != nullptr) {
@@ -903,8 +945,8 @@
   }
 
   // Look for an existing file next to dex. for example, for
-  // /foo/bar/baz.jar, look for /foo/bar/baz.odex.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(dex_location));
+  // /foo/bar/baz.jar, look for /foo/bar/<isa>/baz.odex.
+  std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
   bool open_failed;
   std::string error_msg;
   const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(odex_filename, dex_location,
@@ -1011,10 +1053,11 @@
   mirror::ObjectArray<mirror::DexCache>* dex_caches =
       dex_caches_object->AsObjectArray<mirror::DexCache>();
 
-  SirtRef<mirror::ObjectArray<mirror::Class> > class_roots(
-      self,
-      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>());
-  class_roots_ = class_roots.get();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
+          space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
+          AsObjectArray<mirror::Class>()));
+  class_roots_ = class_roots.Get();
 
   // Special case of setting up the String class early so that we can test arbitrary objects
   // as being Strings or not
@@ -1023,7 +1066,8 @@
   CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    SirtRef<mirror::DexCache> dex_cache(self, dex_caches->Get(i));
+    StackHandleScope<1> hs(self);
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(dex_caches->Get(i)));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
     const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(dex_file_location.c_str(),
                                                                      nullptr);
@@ -1053,7 +1097,7 @@
 
   // reinit class_roots_
   mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
-  class_roots_ = class_roots.get();
+  class_roots_ = class_roots.Get();
 
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GetClassRoot(kObjectArrayClass)->GetIfTable();
@@ -1196,42 +1240,43 @@
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_file) {
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  SirtRef<mirror::Class> dex_cache_class(self, GetClassRoot(kJavaLangDexCache));
-  SirtRef<mirror::DexCache> dex_cache(
-      self, down_cast<mirror::DexCache*>(
-          heap->AllocObject<true>(self, dex_cache_class.get(), dex_cache_class->GetObjectSize(),
-                                  VoidFunctor())));
-  if (dex_cache.get() == NULL) {
+  StackHandleScope<16> hs(self);
+  Handle<mirror::Class> dex_cache_class(hs.NewHandle(GetClassRoot(kJavaLangDexCache)));
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(down_cast<mirror::DexCache*>(
+          heap->AllocObject<true>(self, dex_cache_class.Get(), dex_cache_class->GetObjectSize(),
+                                  VoidFunctor()))));
+  if (dex_cache.Get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::String>
-      location(self, intern_table_->InternStrong(dex_file.GetLocation().c_str()));
-  if (location.get() == NULL) {
+  Handle<mirror::String>
+      location(hs.NewHandle(intern_table_->InternStrong(dex_file.GetLocation().c_str())));
+  if (location.Get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::ObjectArray<mirror::String> >
-      strings(self, AllocStringArray(self, dex_file.NumStringIds()));
-  if (strings.get() == NULL) {
+  Handle<mirror::ObjectArray<mirror::String>>
+      strings(hs.NewHandle(AllocStringArray(self, dex_file.NumStringIds())));
+  if (strings.Get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::ObjectArray<mirror::Class> >
-      types(self, AllocClassArray(self, dex_file.NumTypeIds()));
-  if (types.get() == NULL) {
+  Handle<mirror::ObjectArray<mirror::Class>>
+      types(hs.NewHandle(AllocClassArray(self, dex_file.NumTypeIds())));
+  if (types.Get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
-      methods(self, AllocArtMethodArray(self, dex_file.NumMethodIds()));
-  if (methods.get() == NULL) {
+  Handle<mirror::ObjectArray<mirror::ArtMethod>>
+      methods(hs.NewHandle(AllocArtMethodArray(self, dex_file.NumMethodIds())));
+  if (methods.Get() == NULL) {
     return NULL;
   }
-  SirtRef<mirror::ObjectArray<mirror::ArtField> >
-      fields(self, AllocArtFieldArray(self, dex_file.NumFieldIds()));
-  if (fields.get() == NULL) {
+  Handle<mirror::ObjectArray<mirror::ArtField>>
+      fields(hs.NewHandle(AllocArtFieldArray(self, dex_file.NumFieldIds())));
+  if (fields.Get() == NULL) {
     return NULL;
   }
-  dex_cache->Init(&dex_file, location.get(), strings.get(), types.get(), methods.get(),
-                  fields.get());
-  return dex_cache.get();
+  dex_cache->Init(&dex_file, location.Get(), strings.Get(), types.Get(), methods.Get(),
+                  fields.Get());
+  return dex_cache.Get();
 }
 
 // Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
@@ -1264,9 +1309,9 @@
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   InitializeClassVisitor visitor(class_size);
-  mirror::Object* k =
-      kMovingClasses ? heap->AllocObject<true>(self, java_lang_Class, class_size, visitor)
-                     : heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
+  mirror::Object* k = (kMovingClasses) ?
+      heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
+      heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
   if (UNLIKELY(k == nullptr)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return nullptr;
@@ -1299,19 +1344,19 @@
   DCHECK(klass != NULL);
   // Wait for the class if it has not already been linked.
   if (!klass->IsResolved() && !klass->IsErroneous()) {
-    SirtRef<mirror::Class> sirt_class(self, klass);
-    ObjectLock<mirror::Class> lock(self, &sirt_class);
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
+    ObjectLock<mirror::Class> lock(self, h_class);
     // Check for circular dependencies between classes.
-    if (!sirt_class->IsResolved() && sirt_class->GetClinitThreadId() == self->GetTid()) {
-      ThrowClassCircularityError(sirt_class.get());
-      sirt_class->SetStatus(mirror::Class::kStatusError, self);
+    if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
+      ThrowClassCircularityError(h_class.Get());
+      h_class->SetStatus(mirror::Class::kStatusError, self);
       return nullptr;
     }
     // Wait for the pending initialization to complete.
-    while (!sirt_class->IsResolved() && !sirt_class->IsErroneous()) {
+    while (!h_class->IsResolved() && !h_class->IsErroneous()) {
       lock.WaitIgnoringInterrupts();
     }
-    klass = sirt_class.get();
   }
   if (klass->IsErroneous()) {
     ThrowEarlierClassFailure(klass);
@@ -1324,7 +1369,7 @@
 }
 
 mirror::Class* ClassLinker::FindClass(Thread* self, const char* descriptor,
-                                      const SirtRef<mirror::ClassLoader>& class_loader) {
+                                      Handle<mirror::ClassLoader> class_loader) {
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   DCHECK(self != nullptr);
   self->AssertNoPendingException();
@@ -1334,18 +1379,18 @@
     return FindPrimitiveClass(descriptor[0]);
   }
   // Find the class in the loaded classes table.
-  mirror::Class* klass = LookupClass(descriptor, class_loader.get());
+  mirror::Class* klass = LookupClass(descriptor, class_loader.Get());
   if (klass != NULL) {
     return EnsureResolved(self, klass);
   }
   // Class is not yet loaded.
   if (descriptor[0] == '[') {
     return CreateArrayClass(self, descriptor, class_loader);
-  } else if (class_loader.get() == nullptr) {
+  } else if (class_loader.Get() == nullptr) {
     DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, boot_class_path_);
     if (pair.second != NULL) {
-      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
-      return DefineClass(descriptor, class_loader, *pair.first, *pair.second);
+      StackHandleScope<1> hs(self);
+      return DefineClass(descriptor, NullHandle<mirror::ClassLoader>(), *pair.first, *pair.second);
     }
   } else if (Runtime::Current()->UseCompileTimeClassPath()) {
     // First try the boot class path, we check the descriptor first to avoid an unnecessary
@@ -1360,7 +1405,7 @@
     {
       ScopedObjectAccessUnchecked soa(self);
       ScopedLocalRef<jobject> jclass_loader(soa.Env(),
-                                            soa.AddLocalReference<jobject>(class_loader.get()));
+                                            soa.AddLocalReference<jobject>(class_loader.Get()));
       class_path = &Runtime::Current()->GetCompileTimeClassPath(jclass_loader.get());
     }
 
@@ -1372,7 +1417,7 @@
   } else {
     ScopedObjectAccessUnchecked soa(self);
     ScopedLocalRef<jobject> class_loader_object(soa.Env(),
-                                                soa.AddLocalReference<jobject>(class_loader.get()));
+                                                soa.AddLocalReference<jobject>(class_loader.Get()));
     std::string class_name_string(DescriptorToDot(descriptor));
     ScopedLocalRef<jobject> result(soa.Env(), NULL);
     {
@@ -1406,47 +1451,48 @@
 }
 
 mirror::Class* ClassLinker::DefineClass(const char* descriptor,
-                                        const SirtRef<mirror::ClassLoader>& class_loader,
+                                        Handle<mirror::ClassLoader> class_loader,
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
   Thread* self = Thread::Current();
-  SirtRef<mirror::Class> klass(self, NULL);
+  StackHandleScope<2> hs(self);
+  auto klass = hs.NewHandle<mirror::Class>(nullptr);
   // Load the class from the dex file.
   if (UNLIKELY(!init_done_)) {
     // finish up init of hand crafted class_roots_
     if (strcmp(descriptor, "Ljava/lang/Object;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangObject));
+      klass.Assign(GetClassRoot(kJavaLangObject));
     } else if (strcmp(descriptor, "Ljava/lang/Class;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangClass));
+      klass.Assign(GetClassRoot(kJavaLangClass));
     } else if (strcmp(descriptor, "Ljava/lang/String;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangString));
+      klass.Assign(GetClassRoot(kJavaLangString));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangDexCache));
+      klass.Assign(GetClassRoot(kJavaLangDexCache));
     } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtField;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangReflectArtField));
+      klass.Assign(GetClassRoot(kJavaLangReflectArtField));
     } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtMethod;") == 0) {
-      klass.reset(GetClassRoot(kJavaLangReflectArtMethod));
+      klass.Assign(GetClassRoot(kJavaLangReflectArtMethod));
     } else {
-      klass.reset(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
+      klass.Assign(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
     }
   } else {
-    klass.reset(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
+    klass.Assign(AllocClass(self, SizeOfClass(dex_file, dex_class_def)));
   }
-  if (UNLIKELY(klass.get() == NULL)) {
+  if (UNLIKELY(klass.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // Expect an OOME.
     return NULL;
   }
   klass->SetDexCache(FindDexCache(dex_file));
-  LoadClass(dex_file, dex_class_def, klass, class_loader.get());
+  LoadClass(dex_file, dex_class_def, klass, class_loader.Get());
   // Check for a pending exception during load
   if (self->IsExceptionPending()) {
     klass->SetStatus(mirror::Class::kStatusError, self);
     return NULL;
   }
-  ObjectLock<mirror::Class> lock(self, &klass);
+  ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
   // Add the newly loaded class to the loaded classes table.
-  mirror::Class* existing = InsertClass(descriptor, klass.get(), Hash(descriptor));
+  mirror::Class* existing = InsertClass(descriptor, klass.Get(), Hash(descriptor));
   if (existing != NULL) {
     // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
     // this thread to block.
@@ -1463,7 +1509,7 @@
   // Link the class (if necessary)
   CHECK(!klass->IsResolved());
   // TODO: Use fast jobjects?
-  SirtRef<mirror::ObjectArray<mirror::Class> > interfaces(self, nullptr);
+  auto interfaces = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
   if (!LinkClass(self, klass, interfaces)) {
     // Linking failed.
     klass->SetStatus(mirror::Class::kStatusError, self);
@@ -1482,9 +1528,9 @@
    * The class has been prepared and resolved but possibly not yet verified
    * at this point.
    */
-  Dbg::PostClassPrepare(klass.get());
+  Dbg::PostClassPrepare(klass.Get());
 
-  return klass.get();
+  return klass.Get();
 }
 
 // Precomputes size that will be needed for Class, matching LinkStaticFields
@@ -1666,8 +1712,9 @@
 }
 
 // Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(mirror::ArtMethod* method, const void* quick_code,
-                             const void* portable_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+static bool NeedsInterpreter(
+    mirror::ArtMethod* method, const void* quick_code, const void* portable_code)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if ((quick_code == nullptr) && (portable_code == nullptr)) {
     // No code: need interpreter.
     // May return true for native code, in the case of generic JNI
@@ -1696,9 +1743,8 @@
   if (!runtime->IsStarted() || runtime->UseCompileTimeClassPath()) {
     return;  // OAT file unavailable.
   }
-  ClassHelper kh(klass);
-  const DexFile& dex_file = kh.GetDexFile();
-  const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
+  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile::ClassDef* dex_class_def = klass->GetClassDef();
   CHECK(dex_class_def != nullptr);
   const byte* class_data = dex_file.GetClassData(*dex_class_def);
   // There should always be class data if there were direct methods.
@@ -1749,20 +1795,20 @@
   // Ignore virtual methods on the iterator.
 }
 
-static void LinkCode(const SirtRef<mirror::ArtMethod>& method, const OatFile::OatClass* oat_class,
-                     const DexFile& dex_file, uint32_t dex_method_index, uint32_t method_index)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ClassLinker::LinkCode(Handle<mirror::ArtMethod> method, const OatFile::OatClass* oat_class,
+                           const DexFile& dex_file, uint32_t dex_method_index,
+                           uint32_t method_index) {
   // Method shouldn't have already been linked.
   DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
   DCHECK(method->GetEntryPointFromPortableCompiledCode() == nullptr);
   // Every kind of method should at least get an invoke stub from the oat_method.
   // non-abstract methods also get their code pointers.
   const OatFile::OatMethod oat_method = oat_class->GetOatMethod(method_index);
-  oat_method.LinkMethod(method.get());
+  oat_method.LinkMethod(method.Get());
 
   // Install entry point from interpreter.
   Runtime* runtime = Runtime::Current();
-  bool enter_interpreter = NeedsInterpreter(method.get(),
+  bool enter_interpreter = NeedsInterpreter(method.Get(),
                                             method->GetEntryPointFromQuickCompiledCode(),
                                             method->GetEntryPointFromPortableCompiledCode());
   if (enter_interpreter && !method->IsNative()) {
@@ -1782,8 +1828,8 @@
     // For static methods excluding the class initializer, install the trampoline.
     // It will be replaced by the proper entry point by ClassLinker::FixupStaticTrampolines
     // after initializing class (see ClassLinker::InitializeClass method).
-    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(runtime->GetClassLinker()));
-    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(runtime->GetClassLinker()));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline());
+    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline());
   } else if (enter_interpreter) {
     if (!method->IsNative()) {
       // Set entry point from compiled code if there's no code or in interpreter only mode.
@@ -1809,35 +1855,13 @@
     if (enter_interpreter) {
       // We have a native method here without code. Then it should have either the GenericJni
       // trampoline as entrypoint (non-static), or the Resolution trampoline (static).
-      DCHECK(method->GetEntryPointFromQuickCompiledCode() ==
-          GetQuickResolutionTrampoline(runtime->GetClassLinker())
+      DCHECK(method->GetEntryPointFromQuickCompiledCode() == GetQuickResolutionTrampoline()
           || method->GetEntryPointFromQuickCompiledCode() == GetQuickGenericJniTrampoline());
-
-      DCHECK_EQ(method->GetFrameSizeInBytes<false>(), 0U);
-
-      // Fix up method metadata if necessary.
-      uint32_t s_len;
-      const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(dex_method_index), &s_len);
-      uint32_t refs = 1;    // Native method always has "this" or class.
-      for (uint32_t i = 1; i < s_len; ++i) {
-        if (shorty[i] == 'L') {
-          refs++;
-        }
-      }
-      size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSize(refs);
-
-      // Get the generic spill masks and base frame size.
-      mirror::ArtMethod* callee_save_method =
-          Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
-
-      method->SetFrameSizeInBytes(callee_save_method->GetFrameSizeInBytes() + sirt_size);
-      method->SetCoreSpillMask(callee_save_method->GetCoreSpillMask());
-      method->SetFpSpillMask(callee_save_method->GetFpSpillMask());
     }
   }
 
   // Allow instrumentation its chance to hijack code.
-  runtime->GetInstrumentation()->UpdateMethodsCode(method.get(),
+  runtime->GetInstrumentation()->UpdateMethodsCode(method.Get(),
                                                    method->GetEntryPointFromQuickCompiledCode(),
                                                    method->GetEntryPointFromPortableCompiledCode(),
                                                    have_portable_code);
@@ -1845,9 +1869,9 @@
 
 void ClassLinker::LoadClass(const DexFile& dex_file,
                             const DexFile::ClassDef& dex_class_def,
-                            const SirtRef<mirror::Class>& klass,
+                            Handle<mirror::Class> klass,
                             mirror::ClassLoader* class_loader) {
-  CHECK(klass.get() != NULL);
+  CHECK(klass.Get() != NULL);
   CHECK(klass->GetDexCache() != NULL);
   CHECK_EQ(mirror::Class::kStatusNotReady, klass->GetStatus());
   const char* descriptor = dex_file.GetClassDescriptor(dex_class_def);
@@ -1883,7 +1907,7 @@
 
 void ClassLinker::LoadClassMembers(const DexFile& dex_file,
                                    const byte* class_data,
-                                   const SirtRef<mirror::Class>& klass,
+                                   Handle<mirror::Class> klass,
                                    mirror::ClassLoader* class_loader,
                                    const OatFile::OatClass* oat_class) {
   // Load fields.
@@ -1907,21 +1931,23 @@
     klass->SetIFields(fields);
   }
   for (size_t i = 0; it.HasNextStaticField(); i++, it.Next()) {
-    SirtRef<mirror::ArtField> sfield(self, AllocArtField(self));
-    if (UNLIKELY(sfield.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ArtField> sfield(hs.NewHandle(AllocArtField(self)));
+    if (UNLIKELY(sfield.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return;
     }
-    klass->SetStaticField(i, sfield.get());
+    klass->SetStaticField(i, sfield.Get());
     LoadField(dex_file, it, klass, sfield);
   }
   for (size_t i = 0; it.HasNextInstanceField(); i++, it.Next()) {
-    SirtRef<mirror::ArtField> ifield(self, AllocArtField(self));
-    if (UNLIKELY(ifield.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ArtField> ifield(hs.NewHandle(AllocArtField(self)));
+    if (UNLIKELY(ifield.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return;
     }
-    klass->SetInstanceField(i, ifield.get());
+    klass->SetInstanceField(i, ifield.Get());
     LoadField(dex_file, it, klass, ifield);
   }
 
@@ -1948,12 +1974,13 @@
   }
   size_t class_def_method_index = 0;
   for (size_t i = 0; it.HasNextDirectMethod(); i++, it.Next()) {
-    SirtRef<mirror::ArtMethod> method(self, LoadMethod(self, dex_file, it, klass));
-    if (UNLIKELY(method.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ArtMethod> method(hs.NewHandle(LoadMethod(self, dex_file, it, klass)));
+    if (UNLIKELY(method.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return;
     }
-    klass->SetDirectMethod(i, method.get());
+    klass->SetDirectMethod(i, method.Get());
     if (oat_class != nullptr) {
       LinkCode(method, oat_class, dex_file, it.GetMemberIndex(), class_def_method_index);
     }
@@ -1961,12 +1988,13 @@
     class_def_method_index++;
   }
   for (size_t i = 0; it.HasNextVirtualMethod(); i++, it.Next()) {
-    SirtRef<mirror::ArtMethod> method(self, LoadMethod(self, dex_file, it, klass));
-    if (UNLIKELY(method.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ArtMethod> method(hs.NewHandle(LoadMethod(self, dex_file, it, klass)));
+    if (UNLIKELY(method.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return;
     }
-    klass->SetVirtualMethod(i, method.get());
+    klass->SetVirtualMethod(i, method.Get());
     DCHECK_EQ(class_def_method_index, it.NumDirectMethods() + i);
     if (oat_class != nullptr) {
       LinkCode(method, oat_class, dex_file, it.GetMemberIndex(), class_def_method_index);
@@ -1977,17 +2005,16 @@
 }
 
 void ClassLinker::LoadField(const DexFile& /*dex_file*/, const ClassDataItemIterator& it,
-                            const SirtRef<mirror::Class>& klass,
-                            const SirtRef<mirror::ArtField>& dst) {
+                            Handle<mirror::Class> klass, Handle<mirror::ArtField> dst) {
   uint32_t field_idx = it.GetMemberIndex();
   dst->SetDexFieldIndex(field_idx);
-  dst->SetDeclaringClass(klass.get());
+  dst->SetDeclaringClass(klass.Get());
   dst->SetAccessFlags(it.GetMemberAccessFlags());
 }
 
 mirror::ArtMethod* ClassLinker::LoadMethod(Thread* self, const DexFile& dex_file,
                                            const ClassDataItemIterator& it,
-                                           const SirtRef<mirror::Class>& klass) {
+                                           Handle<mirror::Class> klass) {
   uint32_t dex_method_idx = it.GetMemberIndex();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
   const char* method_name = dex_file.StringDataByIdx(method_id.name_idx_);
@@ -2001,7 +2028,7 @@
 
   const char* old_cause = self->StartAssertNoThreadSuspension("LoadMethod");
   dst->SetDexMethodIndex(dex_method_idx);
-  dst->SetDeclaringClass(klass.get());
+  dst->SetDeclaringClass(klass.Get());
   dst->SetCodeItemOffset(it.GetMethodCodeItemOffset());
 
   dst->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
@@ -2017,15 +2044,14 @@
       if (klass->GetClassLoader() != NULL) {  // All non-boot finalizer methods are flagged
         klass->SetFinalizable();
       } else {
-        ClassHelper kh(klass.get());
-        const char* klass_descriptor = kh.GetDescriptor();
+        std::string klass_descriptor = klass->GetDescriptor();
         // The Enum class declares a "final" finalize() method to prevent subclasses from
         // introducing a finalizer. We don't want to set the finalizable flag for Enum or its
         // subclasses, so we exclude it here.
         // We also want to avoid setting the flag on Object, where we know that finalize() is
         // empty.
-        if ((strcmp("Ljava/lang/Object;", klass_descriptor) != 0) &&
-            (strcmp("Ljava/lang/Enum;", klass_descriptor) != 0)) {
+        if (klass_descriptor.compare("Ljava/lang/Object;") != 0 &&
+            klass_descriptor.compare("Ljava/lang/Enum;") != 0) {
           klass->SetFinalizable();
         }
       }
@@ -2039,7 +2065,7 @@
     } else {
       if (UNLIKELY((access_flags & kAccConstructor) == 0)) {
         LOG(WARNING) << method_name << " didn't have expected constructor access flag in class "
-            << PrettyDescriptor(klass.get()) << " in dex file " << dex_file.GetLocation();
+            << PrettyDescriptor(klass.Get()) << " in dex file " << dex_file.GetLocation();
         access_flags |= kAccConstructor;
       }
     }
@@ -2052,14 +2078,15 @@
 
 void ClassLinker::AppendToBootClassPath(const DexFile& dex_file) {
   Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, AllocDexCache(self, dex_file));
-  CHECK(dex_cache.get() != NULL) << "Failed to allocate dex cache for " << dex_file.GetLocation();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
+  CHECK(dex_cache.Get() != NULL) << "Failed to allocate dex cache for " << dex_file.GetLocation();
   AppendToBootClassPath(dex_file, dex_cache);
 }
 
 void ClassLinker::AppendToBootClassPath(const DexFile& dex_file,
-                                        const SirtRef<mirror::DexCache>& dex_cache) {
-  CHECK(dex_cache.get() != NULL) << dex_file.GetLocation();
+                                        Handle<mirror::DexCache> dex_cache) {
+  CHECK(dex_cache.Get() != NULL) << dex_file.GetLocation();
   boot_class_path_.push_back(&dex_file);
   RegisterDexFile(dex_file, dex_cache);
 }
@@ -2080,12 +2107,12 @@
 }
 
 void ClassLinker::RegisterDexFileLocked(const DexFile& dex_file,
-                                        const SirtRef<mirror::DexCache>& dex_cache) {
+                                        Handle<mirror::DexCache> dex_cache) {
   dex_lock_.AssertExclusiveHeld(Thread::Current());
-  CHECK(dex_cache.get() != NULL) << dex_file.GetLocation();
+  CHECK(dex_cache.Get() != NULL) << dex_file.GetLocation();
   CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()))
       << dex_cache->GetLocation()->ToModifiedUtf8() << " " << dex_file.GetLocation();
-  dex_caches_.push_back(dex_cache.get());
+  dex_caches_.push_back(dex_cache.Get());
   dex_cache->SetDexFile(&dex_file);
   if (log_new_dex_caches_roots_) {
     // TODO: This is not safe if we can remove dex caches.
@@ -2104,8 +2131,9 @@
   // Don't alloc while holding the lock, since allocation may need to
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
-  SirtRef<mirror::DexCache> dex_cache(self, AllocDexCache(self, dex_file));
-  CHECK(dex_cache.get() != NULL) << "Failed to allocate dex cache for " << dex_file.GetLocation();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
+  CHECK(dex_cache.Get() != NULL) << "Failed to allocate dex cache for " << dex_file.GetLocation();
   {
     WriterMutexLock mu(self, dex_lock_);
     if (IsDexFileRegisteredLocked(dex_file)) {
@@ -2116,7 +2144,7 @@
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file,
-                                  const SirtRef<mirror::DexCache>& dex_cache) {
+                                  Handle<mirror::DexCache> dex_cache) {
   WriterMutexLock mu(Thread::Current(), dex_lock_);
   RegisterDexFileLocked(dex_file, dex_cache);
 }
@@ -2167,8 +2195,9 @@
   CHECK(primitive_class != NULL);
   // Must hold lock on object when initializing.
   Thread* self = Thread::Current();
-  SirtRef<mirror::Class> sirt_class(self, primitive_class);
-  ObjectLock<mirror::Class> lock(self, &sirt_class);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> h_class(hs.NewHandle(primitive_class));
+  ObjectLock<mirror::Class> lock(self, h_class);
   primitive_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
   primitive_class->SetPrimitiveType(type);
   primitive_class->SetStatus(mirror::Class::kStatusInitialized, self);
@@ -2192,15 +2221,19 @@
 //
 // Returns NULL with an exception raised on failure.
 mirror::Class* ClassLinker::CreateArrayClass(Thread* self, const char* descriptor,
-                                             const SirtRef<mirror::ClassLoader>& class_loader) {
+                                             Handle<mirror::ClassLoader> class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
-  SirtRef<mirror::Class> component_type(self, FindClass(self, descriptor + 1, class_loader));
-  if (component_type.get() == nullptr) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> component_type(hs.NewHandle(FindClass(self, descriptor + 1, class_loader)));
+  if (component_type.Get() == nullptr) {
     DCHECK(self->IsExceptionPending());
     return nullptr;
   }
-
+  if (UNLIKELY(component_type->IsPrimitiveVoid())) {
+    ThrowNoClassDefFoundError("Attempt to create array of void primitive type");
+    return nullptr;
+  }
   // See if the component type is already loaded.  Array classes are
   // always associated with the class loader of their underlying
   // element type -- an array of Strings goes with the loader for
@@ -2218,7 +2251,7 @@
   // because we effectively do this lookup again when we add the new
   // class to the hash table --- necessary because of possible races with
   // other threads.)
-  if (class_loader.get() != component_type->GetClassLoader()) {
+  if (class_loader.Get() != component_type->GetClassLoader()) {
     mirror::Class* new_class = LookupClass(descriptor, component_type->GetClassLoader());
     if (new_class != NULL) {
       return new_class;
@@ -2233,35 +2266,35 @@
   //
   // Array classes are simple enough that we don't need to do a full
   // link step.
-  SirtRef<mirror::Class> new_class(self, NULL);
+  auto new_class = hs.NewHandle<mirror::Class>(nullptr);
   if (UNLIKELY(!init_done_)) {
     // Classes that were hand created, ie not by FindSystemClass
     if (strcmp(descriptor, "[Ljava/lang/Class;") == 0) {
-      new_class.reset(GetClassRoot(kClassArrayClass));
+      new_class.Assign(GetClassRoot(kClassArrayClass));
     } else if (strcmp(descriptor, "[Ljava/lang/Object;") == 0) {
-      new_class.reset(GetClassRoot(kObjectArrayClass));
+      new_class.Assign(GetClassRoot(kObjectArrayClass));
     } else if (strcmp(descriptor, class_roots_descriptors_[kJavaLangStringArrayClass]) == 0) {
-      new_class.reset(GetClassRoot(kJavaLangStringArrayClass));
+      new_class.Assign(GetClassRoot(kJavaLangStringArrayClass));
     } else if (strcmp(descriptor,
                       class_roots_descriptors_[kJavaLangReflectArtMethodArrayClass]) == 0) {
-      new_class.reset(GetClassRoot(kJavaLangReflectArtMethodArrayClass));
+      new_class.Assign(GetClassRoot(kJavaLangReflectArtMethodArrayClass));
     } else if (strcmp(descriptor,
                       class_roots_descriptors_[kJavaLangReflectArtFieldArrayClass]) == 0) {
-      new_class.reset(GetClassRoot(kJavaLangReflectArtFieldArrayClass));
+      new_class.Assign(GetClassRoot(kJavaLangReflectArtFieldArrayClass));
     } else if (strcmp(descriptor, "[C") == 0) {
-      new_class.reset(GetClassRoot(kCharArrayClass));
+      new_class.Assign(GetClassRoot(kCharArrayClass));
     } else if (strcmp(descriptor, "[I") == 0) {
-      new_class.reset(GetClassRoot(kIntArrayClass));
+      new_class.Assign(GetClassRoot(kIntArrayClass));
     }
   }
-  if (new_class.get() == nullptr) {
-    new_class.reset(AllocClass(self, sizeof(mirror::Class)));
-    if (new_class.get() == nullptr) {
+  if (new_class.Get() == nullptr) {
+    new_class.Assign(AllocClass(self, sizeof(mirror::Class)));
+    if (new_class.Get() == nullptr) {
       return nullptr;
     }
-    new_class->SetComponentType(component_type.get());
+    new_class->SetComponentType(component_type.Get());
   }
-  ObjectLock<mirror::Class> lock(self, &new_class);  // Must hold lock on object when initializing.
+  ObjectLock<mirror::Class> lock(self, new_class);  // Must hold lock on object when initializing.
   DCHECK(new_class->GetComponentType() != NULL);
   mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
   new_class->SetSuperClass(java_lang_Object);
@@ -2299,9 +2332,9 @@
 
   new_class->SetAccessFlags(access_flags);
 
-  mirror::Class* existing = InsertClass(descriptor, new_class.get(), Hash(descriptor));
+  mirror::Class* existing = InsertClass(descriptor, new_class.Get(), Hash(descriptor));
   if (existing == nullptr) {
-    return new_class.get();
+    return new_class.Get();
   }
   // Another thread must have loaded the class after we
   // started but before we finished.  Abandon what we've
@@ -2376,12 +2409,11 @@
 bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
+  for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
+       it != end && it->first == hash;
        ++it) {
     mirror::Class* klass = it->second;
-    ClassHelper kh(klass);
-    if ((klass->GetClassLoader() == class_loader) &&
-        (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
+    if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       class_table_.erase(it);
       return true;
     }
@@ -2425,16 +2457,13 @@
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    ClassHelper kh(klass);
-    if ((klass->GetClassLoader() == class_loader) &&
-        (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
+    if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
           mirror::Class* klass2 = it->second;
-          ClassHelper kh(klass2);
-          CHECK(!((klass2->GetClassLoader() == class_loader) &&
-                  (strcmp(descriptor, kh.GetDescriptor()) == 0)))
+          CHECK(!(klass2->GetClassLoader() == class_loader &&
+              klass2->DescriptorEquals(descriptor)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
               << PrettyClass(klass2) << " " << klass2 << " " << klass2->GetClassLoader();
         }
@@ -2468,11 +2497,10 @@
     for (int32_t j = 0; j < types->GetLength(); j++) {
       mirror::Class* klass = types->Get(j);
       if (klass != NULL) {
-        ClassHelper kh(klass);
         DCHECK(klass->GetClassLoader() == NULL);
-        const char* descriptor = kh.GetDescriptor();
-        size_t hash = Hash(descriptor);
-        mirror::Class* existing = LookupClassFromTableLocked(descriptor, NULL, hash);
+        std::string descriptor = klass->GetDescriptor();
+        size_t hash = Hash(descriptor.c_str());
+        mirror::Class* existing = LookupClassFromTableLocked(descriptor.c_str(), NULL, hash);
         if (existing != NULL) {
           CHECK(existing == klass) << PrettyClassAndClassLoader(existing) << " != "
               << PrettyClassAndClassLoader(klass);
@@ -2526,17 +2554,16 @@
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    ClassHelper kh(klass);
-    if (strcmp(descriptor, kh.GetDescriptor()) == 0) {
+    if (klass->DescriptorEquals(descriptor)) {
       result.push_back(klass);
     }
   }
 }
 
-void ClassLinker::VerifyClass(const SirtRef<mirror::Class>& klass) {
+void ClassLinker::VerifyClass(Handle<mirror::Class> klass) {
   // TODO: assert that the monitor on the Class is held
   Thread* self = Thread::Current();
-  ObjectLock<mirror::Class> lock(self, &klass);
+  ObjectLock<mirror::Class> lock(self, klass);
 
   // Don't attempt to re-verify if already sufficiently verified.
   if (klass->IsVerified() ||
@@ -2547,7 +2574,7 @@
   // The class might already be erroneous, for example at compile time if we attempted to verify
   // this class as a parent to another.
   if (klass->IsErroneous()) {
-    ThrowEarlierClassFailure(klass.get());
+    ThrowEarlierClassFailure(klass.Get());
     return;
   }
 
@@ -2555,7 +2582,7 @@
     klass->SetStatus(mirror::Class::kStatusVerifying, self);
   } else {
     CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
-        << PrettyClass(klass.get());
+        << PrettyClass(klass.Get());
     CHECK(!Runtime::Current()->IsCompiler());
     klass->SetStatus(mirror::Class::kStatusVerifyingAtRuntime, self);
   }
@@ -2567,26 +2594,28 @@
   }
 
   // Verify super class.
-  SirtRef<mirror::Class> super(self, klass->GetSuperClass());
-  if (super.get() != NULL) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> super(hs.NewHandle(klass->GetSuperClass()));
+  if (super.Get() != NULL) {
     // Acquire lock to prevent races on verifying the super class.
-    ObjectLock<mirror::Class> lock(self, &super);
+    ObjectLock<mirror::Class> lock(self, super);
 
     if (!super->IsVerified() && !super->IsErroneous()) {
       VerifyClass(super);
     }
     if (!super->IsCompileTimeVerified()) {
-      std::string error_msg(StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
-                                         PrettyDescriptor(klass.get()).c_str(),
-                                         PrettyDescriptor(super.get()).c_str()));
+      std::string error_msg(
+          StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
+                       PrettyDescriptor(klass.Get()).c_str(),
+                       PrettyDescriptor(super.Get()).c_str()));
       LOG(ERROR) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-      SirtRef<mirror::Throwable> cause(self, self->GetException(NULL));
-      if (cause.get() != nullptr) {
+      Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException(nullptr)));
+      if (cause.Get() != nullptr) {
         self->ClearException();
       }
-      ThrowVerifyError(klass.get(), "%s", error_msg.c_str());
-      if (cause.get() != nullptr) {
-        self->GetException(nullptr)->SetCause(cause.get());
+      ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
+      if (cause.Get() != nullptr) {
+        self->GetException(nullptr)->SetCause(cause.Get());
       }
       ClassReference ref(klass->GetDexCache()->GetDexFile(), klass->GetDexClassDefIndex());
       if (Runtime::Current()->IsCompiler()) {
@@ -2600,26 +2629,26 @@
   // Try to use verification information from the oat file, otherwise do runtime verification.
   const DexFile& dex_file = *klass->GetDexCache()->GetDexFile();
   mirror::Class::Status oat_file_class_status(mirror::Class::kStatusNotReady);
-  bool preverified = VerifyClassUsingOatFile(dex_file, klass.get(), oat_file_class_status);
+  bool preverified = VerifyClassUsingOatFile(dex_file, klass.Get(), oat_file_class_status);
   if (oat_file_class_status == mirror::Class::kStatusError) {
     VLOG(class_linker) << "Skipping runtime verification of erroneous class "
-        << PrettyDescriptor(klass.get()) << " in "
+        << PrettyDescriptor(klass.Get()) << " in "
         << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-    ThrowVerifyError(klass.get(), "Rejecting class %s because it failed compile-time verification",
-                     PrettyDescriptor(klass.get()).c_str());
+    ThrowVerifyError(klass.Get(), "Rejecting class %s because it failed compile-time verification",
+                     PrettyDescriptor(klass.Get()).c_str());
     klass->SetStatus(mirror::Class::kStatusError, self);
     return;
   }
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
   if (!preverified) {
-    verifier_failure = verifier::MethodVerifier::VerifyClass(klass.get(),
+    verifier_failure = verifier::MethodVerifier::VerifyClass(klass.Get(),
                                                              Runtime::Current()->IsCompiler(),
                                                              &error_msg);
   }
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
     if (!preverified && verifier_failure != verifier::MethodVerifier::kNoFailure) {
-      VLOG(class_linker) << "Soft verification failure in class " << PrettyDescriptor(klass.get())
+      VLOG(class_linker) << "Soft verification failure in class " << PrettyDescriptor(klass.Get())
           << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
           << " because: " << error_msg;
     }
@@ -2629,7 +2658,7 @@
     if (verifier_failure == verifier::MethodVerifier::kNoFailure) {
       // Even though there were no verifier failures we need to respect whether the super-class
       // was verified or requiring runtime reverification.
-      if (super.get() == NULL || super->IsVerified()) {
+      if (super.Get() == NULL || super->IsVerified()) {
         klass->SetStatus(mirror::Class::kStatusVerified, self);
       } else {
         CHECK_EQ(super->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
@@ -2649,11 +2678,11 @@
       }
     }
   } else {
-    LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(klass.get())
+    LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(klass.Get())
         << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
         << " because: " << error_msg;
     self->AssertNoPendingException();
-    ThrowVerifyError(klass.get(), "%s", error_msg.c_str());
+    ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
     klass->SetStatus(mirror::Class::kStatusError, self);
   }
   if (preverified || verifier_failure == verifier::MethodVerifier::kNoFailure) {
@@ -2738,13 +2767,13 @@
   }
   LOG(FATAL) << "Unexpected class status: " << oat_file_class_status
              << " " << dex_file.GetLocation() << " " << PrettyClass(klass) << " "
-             << ClassHelper(klass).GetDescriptor();
+             << klass->GetDescriptor();
 
   return false;
 }
 
 void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                                    const SirtRef<mirror::Class>& klass) {
+                                                    Handle<mirror::Class> klass) {
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     ResolveMethodExceptionHandlerTypes(dex_file, klass->GetDirectMethod(i));
   }
@@ -2785,15 +2814,16 @@
 
 static void CheckProxyConstructor(mirror::ArtMethod* constructor);
 static void CheckProxyMethod(mirror::ArtMethod* method,
-                             SirtRef<mirror::ArtMethod>& prototype);
+                             Handle<mirror::ArtMethod> prototype);
 
-mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccess& soa, jstring name,
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
                                              jobjectArray interfaces, jobject loader,
                                              jobjectArray methods, jobjectArray throws) {
   Thread* self = soa.Self();
-  SirtRef<mirror::Class> klass(self, AllocClass(self, GetClassRoot(kJavaLangClass),
-                                                sizeof(mirror::SynthesizedProxyClass)));
-  if (klass.get() == NULL) {
+  StackHandleScope<8> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(AllocClass(self, GetClassRoot(kJavaLangClass),
+                                                      sizeof(mirror::SynthesizedProxyClass))));
+  if (klass.Get() == NULL) {
     CHECK(self->IsExceptionPending());  // OOME.
     return NULL;
   }
@@ -2818,38 +2848,38 @@
   }
   // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by
   // our proxy, so Class.getInterfaces doesn't return the flattened set.
-  SirtRef<mirror::ArtField> interfaces_sfield(self, AllocArtField(self));
-  if (UNLIKELY(interfaces_sfield.get() == NULL)) {
+  Handle<mirror::ArtField> interfaces_sfield(hs.NewHandle(AllocArtField(self)));
+  if (UNLIKELY(interfaces_sfield.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());  // OOME.
-    return NULL;
+    return nullptr;
   }
-  klass->SetStaticField(0, interfaces_sfield.get());
+  klass->SetStaticField(0, interfaces_sfield.Get());
   interfaces_sfield->SetDexFieldIndex(0);
-  interfaces_sfield->SetDeclaringClass(klass.get());
+  interfaces_sfield->SetDeclaringClass(klass.Get());
   interfaces_sfield->SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
   // 2. Create a static field 'throws' that holds exceptions thrown by our methods.
-  SirtRef<mirror::ArtField> throws_sfield(self, AllocArtField(self));
-  if (UNLIKELY(throws_sfield.get() == NULL)) {
+  Handle<mirror::ArtField> throws_sfield(hs.NewHandle(AllocArtField(self)));
+  if (UNLIKELY(throws_sfield.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());  // OOME.
-    return NULL;
+    return nullptr;
   }
-  klass->SetStaticField(1, throws_sfield.get());
+  klass->SetStaticField(1, throws_sfield.Get());
   throws_sfield->SetDexFieldIndex(1);
-  throws_sfield->SetDeclaringClass(klass.get());
+  throws_sfield->SetDeclaringClass(klass.Get());
   throws_sfield->SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
   {
     mirror::ObjectArray<mirror::ArtMethod>* directs = AllocArtMethodArray(self, 1);
-    if (UNLIKELY(directs == NULL)) {
+    if (UNLIKELY(directs == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
-      return NULL;
+      return nullptr;
     }
     klass->SetDirectMethods(directs);
     mirror::ArtMethod* constructor = CreateProxyConstructor(self, klass, proxy_class);
-    if (UNLIKELY(constructor == NULL)) {
+    if (UNLIKELY(constructor == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
-      return NULL;
+      return nullptr;
     }
     klass->SetDirectMethod(0, constructor);
   }
@@ -2858,7 +2888,8 @@
   size_t num_virtual_methods =
       soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods)->GetLength();
   {
-    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self, num_virtual_methods);
+    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self,
+                                                                           num_virtual_methods);
     if (UNLIKELY(virtuals == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2866,44 +2897,48 @@
     klass->SetVirtualMethods(virtuals);
   }
   for (size_t i = 0; i < num_virtual_methods; ++i) {
+    StackHandleScope<1> hs(self);
     mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
         soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
-    SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
+    Handle<mirror::ArtMethod> prototype(hs.NewHandle(decoded_methods->Get(i)));
     mirror::ArtMethod* clone = CreateProxyMethod(self, klass, prototype);
-    if (UNLIKELY(clone == NULL)) {
+    if (UNLIKELY(clone == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
-      return NULL;
+      return nullptr;
     }
     klass->SetVirtualMethod(i, clone);
   }
 
   klass->SetSuperClass(proxy_class);  // The super class is java.lang.reflect.Proxy
-  klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Class is now effectively in the loaded state
+  klass->SetStatus(mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
   self->AssertNoPendingException();
 
   {
-    ObjectLock<mirror::Class> lock(self, &klass);  // Must hold lock on object when resolved.
+    ObjectLock<mirror::Class> lock(self, klass);  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
-    SirtRef<mirror::ObjectArray<mirror::Class> > sirt_interfaces(
-        self, soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    if (!LinkClass(self, klass, sirt_interfaces)) {
+    Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
+        hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
+    if (!LinkClass(self, klass, h_interfaces)) {
       klass->SetStatus(mirror::Class::kStatusError, self);
       return nullptr;
     }
 
-    interfaces_sfield->SetObject<false>(klass.get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    throws_sfield->SetObject<false>(klass.get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+    interfaces_sfield->SetObject<false>(
+        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    throws_sfield->SetObject<false>(
+        klass.Get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
     klass->SetStatus(mirror::Class::kStatusInitialized, self);
   }
 
   // sanity checks
   if (kIsDebugBuild) {
-    CHECK(klass->GetIFields() == NULL);
+    CHECK(klass->GetIFields() == nullptr);
     CheckProxyConstructor(klass->GetDirectMethod(0));
     for (size_t i = 0; i < num_virtual_methods; ++i) {
+      StackHandleScope<1> hs(self);
       mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
           soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
-      SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
+      Handle<mirror::ArtMethod> prototype(hs.NewHandle(decoded_methods->Get(i)));
       CheckProxyMethod(klass->GetVirtualMethod(i), prototype);
     }
 
@@ -2917,14 +2952,16 @@
     CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
 
     mirror::SynthesizedProxyClass* synth_proxy_class =
-        down_cast<mirror::SynthesizedProxyClass*>(klass.get());
-    CHECK_EQ(synth_proxy_class->GetInterfaces(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-    CHECK_EQ(synth_proxy_class->GetThrows(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
+        down_cast<mirror::SynthesizedProxyClass*>(klass.Get());
+    CHECK_EQ(synth_proxy_class->GetInterfaces(),
+             soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    CHECK_EQ(synth_proxy_class->GetThrows(),
+             soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
   }
-  std::string descriptor(GetDescriptorForProxy(klass.get()));
-  mirror::Class* existing = InsertClass(descriptor.c_str(), klass.get(), Hash(descriptor.c_str()));
+  std::string descriptor(GetDescriptorForProxy(klass.Get()));
+  mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), Hash(descriptor.c_str()));
   CHECK(existing == nullptr);
-  return klass.get();
+  return klass.Get();
 }
 
 std::string ClassLinker::GetDescriptorForProxy(mirror::Class* proxy_class) {
@@ -2959,7 +2996,7 @@
 
 
 mirror::ArtMethod* ClassLinker::CreateProxyConstructor(Thread* self,
-                                                       const SirtRef<mirror::Class>& klass,
+                                                       Handle<mirror::Class> klass,
                                                        mirror::Class* proxy_class) {
   // Create constructor for Proxy that must initialize h
   mirror::ObjectArray<mirror::ArtMethod>* proxy_direct_methods =
@@ -2976,7 +3013,7 @@
   }
   // Make this constructor public and fix the class to be our Proxy version
   constructor->SetAccessFlags((constructor->GetAccessFlags() & ~kAccProtected) | kAccPublic);
-  constructor->SetDeclaringClass(klass.get());
+  constructor->SetDeclaringClass(klass.Get());
   return constructor;
 }
 
@@ -2990,12 +3027,12 @@
 }
 
 mirror::ArtMethod* ClassLinker::CreateProxyMethod(Thread* self,
-                                                  const SirtRef<mirror::Class>& klass,
-                                                  const SirtRef<mirror::ArtMethod>& prototype) {
+                                                  Handle<mirror::Class> klass,
+                                                  Handle<mirror::ArtMethod> prototype) {
   // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
   // prototype method
   prototype->GetDeclaringClass()->GetDexCache()->SetResolvedMethod(prototype->GetDexMethodIndex(),
-                                                                   prototype.get());
+                                                                   prototype.Get());
   // We steal everything from the prototype (such as DexCache, invoke stub, etc.) then specialize
   // as necessary
   mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(prototype->Clone(self));
@@ -3006,16 +3043,11 @@
 
   // Set class to be the concrete proxy class and clear the abstract flag, modify exceptions to
   // the intersection of throw exceptions as defined in Proxy
-  method->SetDeclaringClass(klass.get());
+  method->SetDeclaringClass(klass.Get());
   method->SetAccessFlags((method->GetAccessFlags() & ~kAccAbstract) | kAccFinal);
 
   // At runtime the method looks like a reference and argument saving method, clone the code
   // related parameters from this method.
-  mirror::ArtMethod* refs_and_args =
-      Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
-  method->SetCoreSpillMask(refs_and_args->GetCoreSpillMask());
-  method->SetFpSpillMask(refs_and_args->GetFpSpillMask());
-  method->SetFrameSizeInBytes(refs_and_args->GetFrameSizeInBytes());
   method->SetEntryPointFromQuickCompiledCode(GetQuickProxyInvokeHandler());
   method->SetEntryPointFromPortableCompiledCode(GetPortableProxyInvokeHandler());
   method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
@@ -3023,8 +3055,7 @@
   return method;
 }
 
-static void CheckProxyMethod(mirror::ArtMethod* method,
-                             SirtRef<mirror::ArtMethod>& prototype)
+static void CheckProxyMethod(mirror::ArtMethod* method, Handle<mirror::ArtMethod> prototype)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Basic sanity
   CHECK(!prototype->IsFinal());
@@ -3039,7 +3070,7 @@
   CHECK_EQ(prototype->GetDexMethodIndex(), method->GetDexMethodIndex());
 
   MethodHelper mh(method);
-  MethodHelper mh2(prototype.get());
+  MethodHelper mh2(prototype.Get());
   CHECK_STREQ(mh.GetName(), mh2.GetName());
   CHECK_STREQ(mh.GetShorty(), mh2.GetShorty());
   // More complex sanity - via dex cache
@@ -3060,8 +3091,7 @@
     }
     // Check if there are encoded static values needing initialization.
     if (klass->NumStaticFields() != 0) {
-      ClassHelper kh(klass);
-      const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
+      const DexFile::ClassDef* dex_class_def = klass->GetClassDef();
       DCHECK(dex_class_def != NULL);
       if (dex_class_def->static_values_off_ != 0) {
         return false;
@@ -3085,7 +3115,7 @@
   return init_done_;
 }
 
-bool ClassLinker::InitializeClass(const SirtRef<mirror::Class>& klass, bool can_init_statics,
+bool ClassLinker::InitializeClass(Handle<mirror::Class> klass, bool can_init_statics,
                                   bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
 
@@ -3097,14 +3127,14 @@
   }
 
   // Fast fail if initialization requires a full runtime. Not part of the JLS.
-  if (!CanWeInitializeClass(klass.get(), can_init_statics, can_init_parents)) {
+  if (!CanWeInitializeClass(klass.Get(), can_init_statics, can_init_parents)) {
     return false;
   }
 
   Thread* self = Thread::Current();
   uint64_t t0;
   {
-    ObjectLock<mirror::Class> lock(self, &klass);
+    ObjectLock<mirror::Class> lock(self, klass);
 
     // Re-check under the lock in case another thread initialized ahead of us.
     if (klass->IsInitialized()) {
@@ -3113,11 +3143,11 @@
 
     // Was the class already found to be erroneous? Done under the lock to match the JLS.
     if (klass->IsErroneous()) {
-      ThrowEarlierClassFailure(klass.get());
+      ThrowEarlierClassFailure(klass.Get());
       return false;
     }
 
-    CHECK(klass->IsResolved()) << PrettyClass(klass.get()) << ": state=" << klass->GetStatus();
+    CHECK(klass->IsResolved()) << PrettyClass(klass.Get()) << ": state=" << klass->GetStatus();
 
     if (!klass->IsVerified()) {
       VerifyClass(klass);
@@ -3154,7 +3184,7 @@
       return false;
     }
 
-    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << PrettyClass(klass.get());
+    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << PrettyClass(klass.Get());
 
     // From here out other threads may observe that we're initializing and so changes of state
     // require the a notification.
@@ -3170,17 +3200,19 @@
     if (!super_class->IsInitialized()) {
       CHECK(!super_class->IsInterface());
       CHECK(can_init_parents);
-      SirtRef<mirror::Class> sirt_super(self, super_class);
-      bool super_initialized = InitializeClass(sirt_super, can_init_statics, true);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+      bool super_initialized = InitializeClass(handle_scope_super, can_init_statics, true);
       if (!super_initialized) {
         // The super class was verified ahead of entering initializing, we should only be here if
         // the super class became erroneous due to initialization.
-        CHECK(sirt_super->IsErroneous() && self->IsExceptionPending())
-            << "Super class initialization failed for " << PrettyDescriptor(sirt_super.get())
-            << " that has unexpected status " << sirt_super->GetStatus()
+        CHECK(handle_scope_super->IsErroneous() && self->IsExceptionPending())
+            << "Super class initialization failed for "
+            << PrettyDescriptor(handle_scope_super.Get())
+            << " that has unexpected status " << handle_scope_super->GetStatus()
             << "\nPending exception:\n"
             << (self->GetException(NULL) != NULL ? self->GetException(NULL)->Dump() : "");
-        ObjectLock<mirror::Class> lock(self, &klass);
+        ObjectLock<mirror::Class> lock(self, klass);
         // Initialization failed because the super-class is erroneous.
         klass->SetStatus(mirror::Class::kStatusError, self);
         return false;
@@ -3189,19 +3221,20 @@
   }
 
   if (klass->NumStaticFields() > 0) {
-    ClassHelper kh(klass.get());
-    const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
+    const DexFile::ClassDef* dex_class_def = klass->GetClassDef();
     CHECK(dex_class_def != NULL);
-    const DexFile& dex_file = kh.GetDexFile();
-    SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
-    SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
+    const DexFile& dex_file = klass->GetDexFile();
+    StackHandleScope<2> hs(self);
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
     EncodedStaticFieldValueIterator it(dex_file, &dex_cache, &class_loader,
                                        this, *dex_class_def);
     if (it.HasNext()) {
       CHECK(can_init_statics);
-      // We reordered the fields, so we need to be able to map the field indexes to the right fields.
+      // We reordered the fields, so we need to be able to map the
+      // field indexes to the right fields.
       SafeMap<uint32_t, mirror::ArtField*> field_map;
-      ConstructFieldMap(dex_file, *dex_class_def, klass.get(), field_map);
+      ConstructFieldMap(dex_file, *dex_class_def, klass.Get(), field_map);
       for (size_t i = 0; it.HasNext(); i++, it.Next()) {
         if (Runtime::Current()->IsActiveTransaction()) {
           it.ReadValueToField<true>(field_map.Get(i));
@@ -3215,19 +3248,15 @@
   mirror::ArtMethod* clinit = klass->FindClassInitializer();
   if (clinit != NULL) {
     CHECK(can_init_statics);
-    if (LIKELY(Runtime::Current()->IsStarted())) {
-      JValue result;
-      clinit->Invoke(self, NULL, 0, &result, "V");
-    } else {
-      art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL);
-    }
+    JValue result;
+    clinit->Invoke(self, NULL, 0, &result, "V");
   }
 
   uint64_t t1 = NanoTime();
 
   bool success = true;
   {
-    ObjectLock<mirror::Class> lock(self, &klass);
+    ObjectLock<mirror::Class> lock(self, klass);
 
     if (self->IsExceptionPending()) {
       WrapExceptionInInitializer();
@@ -3243,17 +3272,17 @@
       // Set the class as initialized except if failed to initialize static fields.
       klass->SetStatus(mirror::Class::kStatusInitialized, self);
       if (VLOG_IS_ON(class_linker)) {
-        ClassHelper kh(klass.get());
-        LOG(INFO) << "Initialized class " << kh.GetDescriptor() << " from " << kh.GetLocation();
+        LOG(INFO) << "Initialized class " << klass->GetDescriptor() << " from " <<
+            klass->GetLocation();
       }
       // Opportunistically set static method trampolines to their destination.
-      FixupStaticTrampolines(klass.get());
+      FixupStaticTrampolines(klass.Get());
     }
   }
   return success;
 }
 
-bool ClassLinker::WaitForInitializeClass(const SirtRef<mirror::Class>& klass, Thread* self,
+bool ClassLinker::WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
                                          ObjectLock<mirror::Class>& lock)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   while (true) {
@@ -3281,49 +3310,52 @@
       // The caller wants an exception, but it was thrown in a
       // different thread.  Synthesize one here.
       ThrowNoClassDefFoundError("<clinit> failed for class %s; see exception in other thread",
-                                PrettyDescriptor(klass.get()).c_str());
+                                PrettyDescriptor(klass.Get()).c_str());
       return false;
     }
     if (klass->IsInitialized()) {
       return true;
     }
-    LOG(FATAL) << "Unexpected class status. " << PrettyClass(klass.get()) << " is "
+    LOG(FATAL) << "Unexpected class status. " << PrettyClass(klass.Get()) << " is "
         << klass->GetStatus();
   }
-  LOG(FATAL) << "Not Reached" << PrettyClass(klass.get());
+  LOG(FATAL) << "Not Reached" << PrettyClass(klass.Get());
 }
 
-bool ClassLinker::ValidateSuperClassDescriptors(const SirtRef<mirror::Class>& klass) {
+bool ClassLinker::ValidateSuperClassDescriptors(Handle<mirror::Class> klass) {
   if (klass->IsInterface()) {
     return true;
   }
-  Thread* self = Thread::Current();
-  // begin with the methods local to the superclass
+  // Begin with the methods local to the superclass.
+  MethodHelper mh;
+  MethodHelper super_mh;
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
-    SirtRef<mirror::Class> super(self, klass->GetSuperClass());
-    for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
-      if (method != super->GetVTable()->Get(i) &&
-          !IsSameMethodSignatureInDifferentClassContexts(self, method, super.get(), klass.get())) {
-        ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
-                          PrettyDescriptor(klass.get()).c_str(), PrettyMethod(method).c_str(),
-                          PrettyDescriptor(super.get()).c_str());
+    for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
+      mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
+      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
+      bool is_override = mh.GetMethod() != super_mh.GetMethod();
+      if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
+        ThrowLinkageError(klass.Get(), "Class %s method %s resolves differently in superclass %s",
+                          PrettyDescriptor(klass.Get()).c_str(),
+                          PrettyMethod(mh.GetMethod()).c_str(),
+                          PrettyDescriptor(klass->GetSuperClass()).c_str());
         return false;
       }
     }
   }
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
-    if (klass->GetClassLoader() != interface->GetClassLoader()) {
-      for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
-        if (!IsSameMethodSignatureInDifferentClassContexts(self, method, interface.get(),
-                                                           method->GetDeclaringClass())) {
-          ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
-                            PrettyDescriptor(method->GetDeclaringClass()).c_str(),
-                            PrettyMethod(method).c_str(),
-                            PrettyDescriptor(interface.get()).c_str());
+    if (klass->GetClassLoader() != klass->GetIfTable()->GetInterface(i)->GetClassLoader()) {
+      uint32_t num_methods = klass->GetIfTable()->GetInterface(i)->NumVirtualMethods();
+      for (uint32_t j = 0; j < num_methods; ++j) {
+        mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
+        super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
+        bool is_override = mh.GetMethod() != super_mh.GetMethod();
+        if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
+          ThrowLinkageError(klass.Get(), "Class %s method %s resolves differently in interface %s",
+                            PrettyDescriptor(klass.Get()).c_str(),
+                            PrettyMethod(mh.GetMethod()).c_str(),
+                            PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
           return false;
         }
       }
@@ -3332,72 +3364,12 @@
   return true;
 }
 
-// Returns true if classes referenced by the signature of the method are the
-// same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(Thread* self,
-                                                                mirror::ArtMethod* method,
-                                                                mirror::Class* klass1,
-                                                                mirror::Class* klass2) {
-  if (klass1 == klass2) {
-    return true;
-  }
-  CHECK(klass1 != nullptr);
-  CHECK(klass2 != nullptr);
-  SirtRef<mirror::ClassLoader> loader1(self, klass1->GetClassLoader());
-  SirtRef<mirror::ClassLoader> loader2(self, klass2->GetClassLoader());
-  const DexFile& dex_file = *method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-  const DexFile::ProtoId& proto_id =
-      dex_file.GetMethodPrototype(dex_file.GetMethodId(method->GetDexMethodIndex()));
-  for (DexFileParameterIterator it(dex_file, proto_id); it.HasNext(); it.Next()) {
-    const char* descriptor = it.GetDescriptor();
-    if (descriptor == nullptr) {
-      break;
-    }
-    if (descriptor[0] == 'L' || descriptor[0] == '[') {
-      // Found a non-primitive type.
-      if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-        return false;
-      }
-    }
-  }
-  // Check the return type
-  const char* descriptor = dex_file.GetReturnTypeDescriptor(proto_id);
-  if (descriptor[0] == 'L' || descriptor[0] == '[') {
-    if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Returns true if the descriptor resolves to the same class in the context of loader1 and loader2.
-bool ClassLinker::IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                           SirtRef<mirror::ClassLoader>& loader1,
-                                                           SirtRef<mirror::ClassLoader>& loader2) {
-  CHECK(descriptor != nullptr);
-  SirtRef<mirror::Class> found1(self, FindClass(self, descriptor, loader1));
-  if (found1.get() == nullptr) {
-    self->ClearException();
-  }
-  mirror::Class* found2 = FindClass(self, descriptor, loader2);
-  if (found2 == nullptr) {
-    self->ClearException();
-  }
-  return found1.get() == found2;
-}
-
-bool ClassLinker::EnsureInitialized(const SirtRef<mirror::Class>& c, bool can_init_fields,
+bool ClassLinker::EnsureInitialized(Handle<mirror::Class> c, bool can_init_fields,
                                     bool can_init_parents) {
-  DCHECK(c.get() != NULL);
-  if (c->IsInitialized()) {
-    return true;
-  }
-
-  bool success = InitializeClass(c, can_init_fields, can_init_parents);
-  if (!success) {
-    if (can_init_fields && can_init_parents) {
-      CHECK(Thread::Current()->IsExceptionPending()) << PrettyClass(c.get());
-    }
+  DCHECK(c.Get() != nullptr);
+  const bool success = c->IsInitialized() || InitializeClass(c, can_init_fields, can_init_parents);
+  if (!success && can_init_fields && can_init_parents) {
+    CHECK(Thread::Current()->IsExceptionPending()) << PrettyClass(c.Get());
   }
   return success;
 }
@@ -3407,17 +3379,17 @@
                                     SafeMap<uint32_t, mirror::ArtField*>& field_map) {
   const byte* class_data = dex_file.GetClassData(dex_class_def);
   ClassDataItemIterator it(dex_file, class_data);
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, c->GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, c->GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(c->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(c->GetClassLoader()));
   CHECK(!kMovingFields);
   for (size_t i = 0; it.HasNextStaticField(); i++, it.Next()) {
     field_map.Put(i, ResolveField(dex_file, it.GetMemberIndex(), dex_cache, class_loader, true));
   }
 }
 
-bool ClassLinker::LinkClass(Thread* self, const SirtRef<mirror::Class>& klass,
-                            const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
+bool ClassLinker::LinkClass(Thread* self, Handle<mirror::Class> klass,
+                            Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
   if (!LinkSuperClass(klass)) {
     return false;
@@ -3438,22 +3410,21 @@
   return true;
 }
 
-bool ClassLinker::LoadSuperAndInterfaces(const SirtRef<mirror::Class>& klass,
-                                         const DexFile& dex_file) {
+bool ClassLinker::LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file) {
   CHECK_EQ(mirror::Class::kStatusIdx, klass->GetStatus());
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
   uint16_t super_class_idx = class_def.superclass_idx_;
   if (super_class_idx != DexFile::kDexNoIndex16) {
-    mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.get());
+    mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.Get());
     if (super_class == NULL) {
       DCHECK(Thread::Current()->IsExceptionPending());
       return false;
     }
     // Verify
     if (!klass->CanAccess(super_class)) {
-      ThrowIllegalAccessError(klass.get(), "Class %s extended by class %s is inaccessible",
+      ThrowIllegalAccessError(klass.Get(), "Class %s extended by class %s is inaccessible",
                               PrettyDescriptor(super_class).c_str(),
-                              PrettyDescriptor(klass.get()).c_str());
+                              PrettyDescriptor(klass.Get()).c_str());
       return false;
     }
     klass->SetSuperClass(super_class);
@@ -3462,7 +3433,7 @@
   if (interfaces != NULL) {
     for (size_t i = 0; i < interfaces->Size(); i++) {
       uint16_t idx = interfaces->GetTypeItem(i).type_idx_;
-      mirror::Class* interface = ResolveType(dex_file, idx, klass.get());
+      mirror::Class* interface = ResolveType(dex_file, idx, klass.Get());
       if (interface == NULL) {
         DCHECK(Thread::Current()->IsExceptionPending());
         return false;
@@ -3470,9 +3441,9 @@
       // Verify
       if (!klass->CanAccess(interface)) {
         // TODO: the RI seemed to ignore this in my testing.
-        ThrowIllegalAccessError(klass.get(), "Interface %s implemented by class %s is inaccessible",
+        ThrowIllegalAccessError(klass.Get(), "Interface %s implemented by class %s is inaccessible",
                                 PrettyDescriptor(interface).c_str(),
-                                PrettyDescriptor(klass.get()).c_str());
+                                PrettyDescriptor(klass.Get()).c_str());
         return false;
       }
     }
@@ -3482,37 +3453,38 @@
   return true;
 }
 
-bool ClassLinker::LinkSuperClass(const SirtRef<mirror::Class>& klass) {
+bool ClassLinker::LinkSuperClass(Handle<mirror::Class> klass) {
   CHECK(!klass->IsPrimitive());
   mirror::Class* super = klass->GetSuperClass();
-  if (klass.get() == GetClassRoot(kJavaLangObject)) {
+  if (klass.Get() == GetClassRoot(kJavaLangObject)) {
     if (super != NULL) {
-      ThrowClassFormatError(klass.get(), "java.lang.Object must not have a superclass");
+      ThrowClassFormatError(klass.Get(), "java.lang.Object must not have a superclass");
       return false;
     }
     return true;
   }
   if (super == NULL) {
-    ThrowLinkageError(klass.get(), "No superclass defined for class %s",
-                      PrettyDescriptor(klass.get()).c_str());
+    ThrowLinkageError(klass.Get(), "No superclass defined for class %s",
+                      PrettyDescriptor(klass.Get()).c_str());
     return false;
   }
   // Verify
   if (super->IsFinal() || super->IsInterface()) {
-    ThrowIncompatibleClassChangeError(klass.get(), "Superclass %s of %s is %s",
+    ThrowIncompatibleClassChangeError(klass.Get(), "Superclass %s of %s is %s",
                                       PrettyDescriptor(super).c_str(),
-                                      PrettyDescriptor(klass.get()).c_str(),
+                                      PrettyDescriptor(klass.Get()).c_str(),
                                       super->IsFinal() ? "declared final" : "an interface");
     return false;
   }
   if (!klass->CanAccess(super)) {
-    ThrowIllegalAccessError(klass.get(), "Superclass %s is inaccessible to class %s",
+    ThrowIllegalAccessError(klass.Get(), "Superclass %s is inaccessible to class %s",
                             PrettyDescriptor(super).c_str(),
-                            PrettyDescriptor(klass.get()).c_str());
+                            PrettyDescriptor(klass.Get()).c_str());
     return false;
   }
 
-  // Inherit kAccClassIsFinalizable from the superclass in case this class doesn't override finalize.
+  // Inherit kAccClassIsFinalizable from the superclass in case this
+  // class doesn't override finalize.
   if (super->IsFinalizable()) {
     klass->SetFinalizable();
   }
@@ -3524,9 +3496,9 @@
   }
   // Disallow custom direct subclasses of java.lang.ref.Reference.
   if (init_done_ && super == GetClassRoot(kJavaLangRefReference)) {
-    ThrowLinkageError(klass.get(),
+    ThrowLinkageError(klass.Get(),
                       "Class %s attempts to subclass java.lang.ref.Reference, which is not allowed",
-                      PrettyDescriptor(klass.get()).c_str());
+                      PrettyDescriptor(klass.Get()).c_str());
     return false;
   }
 
@@ -3541,13 +3513,13 @@
 }
 
 // Populate the class vtable and itable. Compute return type indices.
-bool ClassLinker::LinkMethods(const SirtRef<mirror::Class>& klass,
-                              const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
+bool ClassLinker::LinkMethods(Handle<mirror::Class> klass,
+                              Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
     size_t count = klass->NumVirtualMethods();
     if (!IsUint(16, count)) {
-      ThrowClassFormatError(klass.get(), "Too many methods on interface: %zd", count);
+      ThrowClassFormatError(klass.Get(), "Too many methods on interface: %zd", count);
       return false;
     }
     for (size_t i = 0; i < count; ++i) {
@@ -3562,16 +3534,18 @@
   return true;
 }
 
-bool ClassLinker::LinkVirtualMethods(const SirtRef<mirror::Class>& klass) {
+bool ClassLinker::LinkVirtualMethods(Handle<mirror::Class> klass) {
   Thread* self = Thread::Current();
   if (klass->HasSuperClass()) {
-    uint32_t max_count = klass->NumVirtualMethods() + klass->GetSuperClass()->GetVTable()->GetLength();
+    uint32_t max_count = (klass->NumVirtualMethods() +
+                          klass->GetSuperClass()->GetVTable()->GetLength());
     size_t actual_count = klass->GetSuperClass()->GetVTable()->GetLength();
     CHECK_LE(actual_count, max_count);
     // TODO: do not assign to the vtable field until it is fully constructed.
-    SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
-      vtable(self, klass->GetSuperClass()->GetVTable()->CopyOf(self, max_count));
-    if (UNLIKELY(vtable.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
+        hs.NewHandle(klass->GetSuperClass()->GetVTable()->CopyOf(self, max_count)));
+    if (UNLIKELY(vtable.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return false;
     }
@@ -3584,9 +3558,10 @@
         mirror::ArtMethod* super_method = vtable->Get(j);
         MethodHelper super_mh(super_method);
         if (local_mh.HasSameNameAndSignature(&super_mh)) {
-          if (klass->CanAccessMember(super_method->GetDeclaringClass(), super_method->GetAccessFlags())) {
+          if (klass->CanAccessMember(super_method->GetDeclaringClass(),
+                                     super_method->GetAccessFlags())) {
             if (super_method->IsFinal()) {
-              ThrowLinkageError(klass.get(), "Method %s overrides final method in class %s",
+              ThrowLinkageError(klass.Get(), "Method %s overrides final method in class %s",
                                 PrettyMethod(local_method).c_str(),
                                 super_mh.GetDeclaringClassDescriptor());
               return false;
@@ -3609,29 +3584,30 @@
       }
     }
     if (!IsUint(16, actual_count)) {
-      ThrowClassFormatError(klass.get(), "Too many methods defined on class: %zd", actual_count);
+      ThrowClassFormatError(klass.Get(), "Too many methods defined on class: %zd", actual_count);
       return false;
     }
     // Shrink vtable if possible
     CHECK_LE(actual_count, max_count);
     if (actual_count < max_count) {
-      vtable.reset(vtable->CopyOf(self, actual_count));
-      if (UNLIKELY(vtable.get() == NULL)) {
+      vtable.Assign(vtable->CopyOf(self, actual_count));
+      if (UNLIKELY(vtable.Get() == NULL)) {
         CHECK(self->IsExceptionPending());  // OOME.
         return false;
       }
     }
-    klass->SetVTable(vtable.get());
+    klass->SetVTable(vtable.Get());
   } else {
-    CHECK(klass.get() == GetClassRoot(kJavaLangObject));
+    CHECK(klass.Get() == GetClassRoot(kJavaLangObject));
     uint32_t num_virtual_methods = klass->NumVirtualMethods();
     if (!IsUint(16, num_virtual_methods)) {
-      ThrowClassFormatError(klass.get(), "Too many methods: %d", num_virtual_methods);
+      ThrowClassFormatError(klass.Get(), "Too many methods: %d", num_virtual_methods);
       return false;
     }
-    SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
-        vtable(self, AllocArtMethodArray(self, num_virtual_methods));
-    if (UNLIKELY(vtable.get() == NULL)) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ObjectArray<mirror::ArtMethod>>
+        vtable(hs.NewHandle(AllocArtMethodArray(self, num_virtual_methods)));
+    if (UNLIKELY(vtable.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return false;
     }
@@ -3640,13 +3616,14 @@
       vtable->Set<false>(i, virtual_method);
       virtual_method->SetMethodIndex(i & 0xFFFF);
     }
-    klass->SetVTable(vtable.get());
+    klass->SetVTable(vtable.Get());
   }
   return true;
 }
 
-bool ClassLinker::LinkInterfaceMethods(const SirtRef<mirror::Class>& klass,
-                                       const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
+bool ClassLinker::LinkInterfaceMethods(Handle<mirror::Class> klass,
+                                       Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
+  Thread* const self = Thread::Current();
   // Set the imt table to be all conflicts by default.
   klass->SetImTable(Runtime::Current()->GetDefaultImt());
   size_t super_ifcount;
@@ -3655,18 +3632,14 @@
   } else {
     super_ifcount = 0;
   }
-  size_t ifcount = super_ifcount;
-  uint32_t num_interfaces;
-  {
-    ClassHelper kh(klass.get());
-    num_interfaces =
-        interfaces.get() == nullptr ? kh.NumDirectInterfaces() : interfaces->GetLength();
-    ifcount += num_interfaces;
-    for (size_t i = 0; i < num_interfaces; i++) {
-      mirror::Class* interface =
-          interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
-      ifcount += interface->GetIfTableCount();
-    }
+  uint32_t num_interfaces =
+      interfaces.Get() == nullptr ? klass->NumDirectInterfaces() : interfaces->GetLength();
+  size_t ifcount = super_ifcount + num_interfaces;
+  for (size_t i = 0; i < num_interfaces; i++) {
+    mirror::Class* interface =
+        interfaces.Get() == nullptr ? mirror::Class::GetDirectInterface(self, klass, i) :
+            interfaces->Get(i);
+    ifcount += interface->GetIfTableCount();
   }
   if (ifcount == 0) {
     // Class implements no interfaces.
@@ -3690,9 +3663,9 @@
       return true;
     }
   }
-  Thread* self = Thread::Current();
-  SirtRef<mirror::IfTable> iftable(self, AllocIfTable(self, ifcount));
-  if (UNLIKELY(iftable.get() == NULL)) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::IfTable> iftable(hs.NewHandle(AllocIfTable(self, ifcount)));
+  if (UNLIKELY(iftable.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return false;
   }
@@ -3706,15 +3679,14 @@
   // Flatten the interface inheritance hierarchy.
   size_t idx = super_ifcount;
   for (size_t i = 0; i < num_interfaces; i++) {
-    ClassHelper kh(klass.get());
     mirror::Class* interface =
-        interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
+        interfaces.Get() == nullptr ? mirror::Class::GetDirectInterface(self, klass, i) :
+            interfaces->Get(i);
     DCHECK(interface != NULL);
     if (!interface->IsInterface()) {
-      ClassHelper ih(interface);
-      ThrowIncompatibleClassChangeError(klass.get(), "Class %s implements non-interface class %s",
-                                        PrettyDescriptor(klass.get()).c_str(),
-                                        PrettyDescriptor(ih.GetDescriptor()).c_str());
+      ThrowIncompatibleClassChangeError(klass.Get(), "Class %s implements non-interface class %s",
+                                        PrettyDescriptor(klass.Get()).c_str(),
+                                        PrettyDescriptor(interface->GetDescriptor()).c_str());
       return false;
     }
     // Check if interface is already in iftable
@@ -3748,8 +3720,8 @@
   }
   // Shrink iftable in case duplicates were found
   if (idx < ifcount) {
-    iftable.reset(down_cast<mirror::IfTable*>(iftable->CopyOf(self, idx * mirror::IfTable::kMax)));
-    if (UNLIKELY(iftable.get() == NULL)) {
+    iftable.Assign(down_cast<mirror::IfTable*>(iftable->CopyOf(self, idx * mirror::IfTable::kMax)));
+    if (UNLIKELY(iftable.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return false;
     }
@@ -3757,7 +3729,7 @@
   } else {
     CHECK_EQ(idx, ifcount);
   }
-  klass->SetIfTable(iftable.get());
+  klass->SetIfTable(iftable.Get());
 
   // If we're an interface, we don't need the vtable pointers, so we're done.
   if (klass->IsInterface()) {
@@ -3765,8 +3737,9 @@
   }
   // Allocate imtable
   bool imtable_changed = false;
-  SirtRef<mirror::ObjectArray<mirror::ArtMethod> > imtable(self, AllocArtMethodArray(self, kImtSize));
-  if (UNLIKELY(imtable.get() == NULL)) {
+  Handle<mirror::ObjectArray<mirror::ArtMethod>> imtable(
+      hs.NewHandle(AllocArtMethodArray(self, kImtSize)));
+  if (UNLIKELY(imtable.Get() == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return false;
   }
@@ -3774,15 +3747,16 @@
   for (size_t i = 0; i < ifcount; ++i) {
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
     if (num_methods > 0) {
-      SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
-          method_array(self, AllocArtMethodArray(self, num_methods));
-      if (UNLIKELY(method_array.get() == nullptr)) {
+      StackHandleScope<2> hs(self);
+      Handle<mirror::ObjectArray<mirror::ArtMethod>>
+          method_array(hs.NewHandle(AllocArtMethodArray(self, num_methods)));
+      if (UNLIKELY(method_array.Get() == nullptr)) {
         CHECK(self->IsExceptionPending());  // OOME.
         return false;
       }
-      iftable->SetMethodArray(i, method_array.get());
-      SirtRef<mirror::ObjectArray<mirror::ArtMethod> > vtable(self,
-                                                              klass->GetVTableDuringLinking());
+      iftable->SetMethodArray(i, method_array.Get());
+      Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
+          hs.NewHandle(klass->GetVTableDuringLinking()));
       for (size_t j = 0; j < num_methods; ++j) {
         mirror::ArtMethod* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j);
         MethodHelper interface_mh(interface_method);
@@ -3800,10 +3774,11 @@
           MethodHelper vtable_mh(vtable_method);
           if (interface_mh.HasSameNameAndSignature(&vtable_mh)) {
             if (!vtable_method->IsAbstract() && !vtable_method->IsPublic()) {
-              ThrowIllegalAccessError(klass.get(),
-                                      "Method '%s' implementing interface method '%s' is not public",
-                                      PrettyMethod(vtable_method).c_str(),
-                                      PrettyMethod(interface_method).c_str());
+              ThrowIllegalAccessError(
+                  klass.Get(),
+                  "Method '%s' implementing interface method '%s' is not public",
+                  PrettyMethod(vtable_method).c_str(),
+                  PrettyMethod(interface_method).c_str());
               return false;
             }
             method_array->Set<false>(j, vtable_method);
@@ -3819,26 +3794,27 @@
           }
         }
         if (k < 0) {
-          SirtRef<mirror::ArtMethod> miranda_method(self, NULL);
+          StackHandleScope<1> hs(self);
+          auto miranda_method = hs.NewHandle<mirror::ArtMethod>(nullptr);
           for (size_t mir = 0; mir < miranda_list.size(); mir++) {
             mirror::ArtMethod* mir_method = miranda_list[mir];
             MethodHelper vtable_mh(mir_method);
             if (interface_mh.HasSameNameAndSignature(&vtable_mh)) {
-              miranda_method.reset(miranda_list[mir]);
+              miranda_method.Assign(miranda_list[mir]);
               break;
             }
           }
-          if (miranda_method.get() == NULL) {
+          if (miranda_method.Get() == NULL) {
             // Point the interface table at a phantom slot.
-            miranda_method.reset(down_cast<mirror::ArtMethod*>(interface_method->Clone(self)));
-            if (UNLIKELY(miranda_method.get() == NULL)) {
+            miranda_method.Assign(down_cast<mirror::ArtMethod*>(interface_method->Clone(self)));
+            if (UNLIKELY(miranda_method.Get() == NULL)) {
               CHECK(self->IsExceptionPending());  // OOME.
               return false;
             }
             // TODO: If a methods move then the miranda_list may hold stale references.
-            miranda_list.push_back(miranda_method.get());
+            miranda_list.push_back(miranda_method.Get());
           }
-          method_array->Set<false>(j, miranda_method.get());
+          method_array->Set<false>(j, miranda_method.Get());
         }
       }
     }
@@ -3851,7 +3827,7 @@
         imtable->Set<false>(i, imt_conflict_method);
       }
     }
-    klass->SetImTable(imtable.get());
+    klass->SetImTable(imtable.Get());
   }
   if (!miranda_list.empty()) {
     int old_method_count = klass->NumVirtualMethods();
@@ -3868,13 +3844,14 @@
     }
     klass->SetVirtualMethods(virtuals);
 
-    SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
-        vtable(self, klass->GetVTableDuringLinking());
-    CHECK(vtable.get() != NULL);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable(
+        hs.NewHandle(klass->GetVTableDuringLinking()));
+    CHECK(vtable.Get() != NULL);
     int old_vtable_count = vtable->GetLength();
     int new_vtable_count = old_vtable_count + miranda_list.size();
-    vtable.reset(vtable->CopyOf(self, new_vtable_count));
-    if (UNLIKELY(vtable.get() == NULL)) {
+    vtable.Assign(vtable->CopyOf(self, new_vtable_count));
+    if (UNLIKELY(vtable.Get() == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return false;
     }
@@ -3887,7 +3864,7 @@
       vtable->Set<false>(old_vtable_count + i, method);
     }
     // TODO: do not assign to the vtable field until it is fully constructed.
-    klass->SetVTable(vtable.get());
+    klass->SetVTable(vtable.Get());
   }
 
   mirror::ObjectArray<mirror::ArtMethod>* vtable = klass->GetVTableDuringLinking();
@@ -3900,13 +3877,13 @@
   return true;
 }
 
-bool ClassLinker::LinkInstanceFields(const SirtRef<mirror::Class>& klass) {
-  CHECK(klass.get() != NULL);
+bool ClassLinker::LinkInstanceFields(Handle<mirror::Class> klass) {
+  CHECK(klass.Get() != NULL);
   return LinkFields(klass, false);
 }
 
-bool ClassLinker::LinkStaticFields(const SirtRef<mirror::Class>& klass) {
-  CHECK(klass.get() != NULL);
+bool ClassLinker::LinkStaticFields(Handle<mirror::Class> klass) {
+  CHECK(klass.Get() != NULL);
   size_t allocated_class_size = klass->GetClassSize();
   bool success = LinkFields(klass, true);
   CHECK_EQ(allocated_class_size, klass->GetClassSize());
@@ -3927,8 +3904,10 @@
     if (type1 != type2) {
       bool is_primitive1 = type1 != Primitive::kPrimNot;
       bool is_primitive2 = type2 != Primitive::kPrimNot;
-      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
-      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
+      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong ||
+                                        type1 == Primitive::kPrimDouble);
+      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong ||
+                                        type2 == Primitive::kPrimDouble);
       int order1 = !is_primitive1 ? 0 : (is64bit1 ? 1 : 2);
       int order2 = !is_primitive2 ? 0 : (is64bit2 ? 1 : 2);
       if (order1 != order2) {
@@ -3942,7 +3921,7 @@
   }
 };
 
-bool ClassLinker::LinkFields(const SirtRef<mirror::Class>& klass, bool is_static) {
+bool ClassLinker::LinkFields(Handle<mirror::Class> klass, bool is_static) {
   size_t num_fields =
       is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
 
@@ -3958,20 +3937,21 @@
   } else {
     mirror::Class* super_class = klass->GetSuperClass();
     if (super_class != NULL) {
-      CHECK(super_class->IsResolved());
+      CHECK(super_class->IsResolved())
+          << PrettyClass(klass.Get()) << " " << PrettyClass(super_class);
       field_offset = MemberOffset(super_class->GetObjectSize());
     }
     size = field_offset.Uint32Value();
   }
 
-  CHECK_EQ(num_fields == 0, fields == NULL);
+  CHECK_EQ(num_fields == 0, fields == NULL) << PrettyClass(klass.Get());
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
   std::deque<mirror::ArtField*> grouped_and_sorted_fields;
   for (size_t i = 0; i < num_fields; i++) {
     mirror::ArtField* f = fields->Get(i);
-    CHECK(f != NULL);
+    CHECK(f != NULL) << PrettyClass(klass.Get());
     grouped_and_sorted_fields.push_back(f);
   }
   std::sort(grouped_and_sorted_fields.begin(), grouped_and_sorted_fields.end(),
@@ -4003,7 +3983,7 @@
       mirror::ArtField* field = grouped_and_sorted_fields[i];
       FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
-      CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
+      CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
         continue;
       }
@@ -4019,13 +3999,14 @@
 
   // Alignment is good, shuffle any double-wide fields forward, and
   // finish assigning field offsets to all fields.
-  DCHECK(current_field == num_fields || IsAligned<8>(field_offset.Uint32Value()));
+  DCHECK(current_field == num_fields || IsAligned<8>(field_offset.Uint32Value()))
+      << PrettyClass(klass.Get());
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
     FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
-    CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
+    CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
     fields->Set<false>(current_field, field);
     field->SetOffset(field_offset);
     field_offset = MemberOffset(field_offset.Uint32Value() +
@@ -4036,13 +4017,12 @@
   }
 
   // We lie to the GC about the java.lang.ref.Reference.referent field, so it doesn't scan it.
-  if (!is_static &&
-      (strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)) {
+  if (!is_static && klass->DescriptorEquals("Ljava/lang/ref/Reference;")) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
-    CHECK_EQ(num_reference_fields, num_fields);
+    CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
     FieldHelper fh(fields->Get(num_fields - 1));
-    CHECK_STREQ(fh.GetName(), "referent");
+    CHECK_STREQ(fh.GetName(), "referent") << PrettyClass(klass.Get());
     --num_reference_fields;
   }
 
@@ -4054,7 +4034,7 @@
       mirror::ArtField* field = fields->Get(i);
       if (false) {  // enable to debug field layout
         LOG(INFO) << "LinkFields: " << (is_static ? "static" : "instance")
-                    << " class=" << PrettyClass(klass.get())
+                    << " class=" << PrettyClass(klass.Get())
                     << " field=" << PrettyField(field)
                     << " offset="
                     << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()));
@@ -4062,21 +4042,21 @@
       FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
-      if ((strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)
-          && (strcmp("referent", fh.GetName()) == 0)) {
+      if (klass->DescriptorEquals("Ljava/lang/ref/Reference;") &&
+          strcmp("referent", fh.GetName()) == 0) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
       if (is_primitive) {
         if (!seen_non_ref) {
           seen_non_ref = true;
-          DCHECK_EQ(num_reference_fields, i);
+          DCHECK_EQ(num_reference_fields, i) << PrettyField(field);
         }
       } else {
-        DCHECK(!seen_non_ref);
+        DCHECK(!seen_non_ref) << PrettyField(field);
       }
     }
     if (!seen_non_ref) {
-      DCHECK_EQ(num_fields, num_reference_fields);
+      DCHECK_EQ(num_fields, num_reference_fields) << PrettyClass(klass.Get());
     }
   }
   size = field_offset.Uint32Value();
@@ -4087,11 +4067,11 @@
   } else {
     klass->SetNumReferenceInstanceFields(num_reference_fields);
     if (!klass->IsVariableSize()) {
-      DCHECK_GE(size, sizeof(mirror::Object)) << ClassHelper(klass.get()).GetDescriptor();
+      DCHECK_GE(size, sizeof(mirror::Object)) << klass->GetDescriptor();
       size_t previous_size = klass->GetObjectSize();
       if (previous_size != 0) {
         // Make sure that we didn't originally have an incorrect size.
-        CHECK_EQ(previous_size, size);
+        CHECK_EQ(previous_size, size) << klass->GetDescriptor();
       }
       klass->SetObjectSize(size);
     }
@@ -4101,7 +4081,7 @@
 
 //  Set the bitmap of reference offsets, refOffsets, from the ifields
 //  list.
-void ClassLinker::CreateReferenceInstanceOffsets(const SirtRef<mirror::Class>& klass) {
+void ClassLinker::CreateReferenceInstanceOffsets(Handle<mirror::Class> klass) {
   uint32_t reference_offsets = 0;
   mirror::Class* super_class = klass->GetSuperClass();
   if (super_class != NULL) {
@@ -4115,11 +4095,11 @@
   CreateReferenceOffsets(klass, false, reference_offsets);
 }
 
-void ClassLinker::CreateReferenceStaticOffsets(const SirtRef<mirror::Class>& klass) {
+void ClassLinker::CreateReferenceStaticOffsets(Handle<mirror::Class> klass) {
   CreateReferenceOffsets(klass, true, 0);
 }
 
-void ClassLinker::CreateReferenceOffsets(const SirtRef<mirror::Class>& klass, bool is_static,
+void ClassLinker::CreateReferenceOffsets(Handle<mirror::Class> klass, bool is_static,
                                          uint32_t reference_offsets) {
   size_t num_reference_fields =
       is_static ? klass->NumReferenceStaticFieldsDuringLinking()
@@ -4152,8 +4132,8 @@
 }
 
 mirror::String* ClassLinker::ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                           const SirtRef<mirror::DexCache>& dex_cache) {
-  DCHECK(dex_cache.get() != nullptr);
+                                           Handle<mirror::DexCache> dex_cache) {
+  DCHECK(dex_cache.Get() != nullptr);
   mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != NULL) {
     return resolved;
@@ -4167,16 +4147,16 @@
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
                                         mirror::Class* referrer) {
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, referrer->GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, referrer->GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
   return ResolveType(dex_file, type_idx, dex_cache, class_loader);
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                                        const SirtRef<mirror::DexCache>& dex_cache,
-                                        const SirtRef<mirror::ClassLoader>& class_loader) {
-  DCHECK(dex_cache.get() != NULL);
+                                        Handle<mirror::DexCache> dex_cache,
+                                        Handle<mirror::ClassLoader> class_loader) {
+  DCHECK(dex_cache.Get() != NULL);
   mirror::Class* resolved = dex_cache->GetResolvedType(type_idx);
   if (resolved == NULL) {
     Thread* self = Thread::Current();
@@ -4191,12 +4171,13 @@
       CHECK(self->IsExceptionPending())
           << "Expected pending exception for failed resolution of: " << descriptor;
       // Convert a ClassNotFoundException to a NoClassDefFoundError.
-      SirtRef<mirror::Throwable> cause(self, self->GetException(NULL));
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException(nullptr)));
       if (cause->InstanceOf(GetClassRoot(kJavaLangClassNotFoundException))) {
-        DCHECK(resolved == NULL);  // No SirtRef needed to preserve resolved.
+        DCHECK(resolved == NULL);  // No Handle needed to preserve resolved.
         self->ClearException();
         ThrowNoClassDefFoundError("Failed resolution of: %s", descriptor);
-        self->GetException(NULL)->SetCause(cause.get());
+        self->GetException(NULL)->SetCause(cause.Get());
       }
     }
   }
@@ -4205,16 +4186,15 @@
   return resolved;
 }
 
-mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
-                                              uint32_t method_idx,
-                                              const SirtRef<mirror::DexCache>& dex_cache,
-                                              const SirtRef<mirror::ClassLoader>& class_loader,
-                                              mirror::ArtMethod* referrer,
+mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file, uint32_t method_idx,
+                                              Handle<mirror::DexCache> dex_cache,
+                                              Handle<mirror::ClassLoader> class_loader,
+                                              Handle<mirror::ArtMethod> referrer,
                                               InvokeType type) {
-  DCHECK(dex_cache.get() != NULL);
+  DCHECK(dex_cache.Get() != NULL);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
-  if (resolved != NULL && !resolved->IsRuntimeMethod()) {
+  if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
     return resolved;
   }
   // Fail, get the declaring class.
@@ -4229,15 +4209,15 @@
   switch (type) {
     case kDirect:  // Fall-through.
     case kStatic:
-      resolved = klass->FindDirectMethod(dex_cache.get(), method_idx);
+      resolved = klass->FindDirectMethod(dex_cache.Get(), method_idx);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
+      resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx);
       DCHECK(resolved == NULL || resolved->GetDeclaringClass()->IsInterface());
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
-      resolved = klass->FindVirtualMethod(dex_cache.get(), method_idx);
+      resolved = klass->FindVirtualMethod(dex_cache.Get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
@@ -4289,7 +4269,7 @@
     }
 
     // If we found something, check that it can be accessed by the referrer.
-    if (resolved != NULL && referrer != NULL) {
+    if (resolved != NULL && referrer.Get() != NULL) {
       mirror::Class* methods_class = resolved->GetDeclaringClass();
       mirror::Class* referring_class = referrer->GetDeclaringClass();
       if (!referring_class->CanAccess(methods_class)) {
@@ -4309,11 +4289,11 @@
       case kDirect:
       case kStatic:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
         } else {
           resolved = klass->FindInterfaceMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4321,11 +4301,11 @@
         break;
       case kInterface:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
         } else {
           resolved = klass->FindVirtualMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4336,11 +4316,11 @@
         break;
       case kVirtual:
         if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer);
+          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
         } else {
           resolved = klass->FindInterfaceMethod(name, signature);
           if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer);
+            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
           } else {
             ThrowNoSuchMethodError(type, klass, name, signature);
           }
@@ -4353,37 +4333,40 @@
 }
 
 mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
-                                            const SirtRef<mirror::DexCache>& dex_cache,
-                                            const SirtRef<mirror::ClassLoader>& class_loader,
+                                            Handle<mirror::DexCache> dex_cache,
+                                            Handle<mirror::ClassLoader> class_loader,
                                             bool is_static) {
-  DCHECK(dex_cache.get() != nullptr);
+  DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
   }
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
-  mirror::Class* klass = ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader);
-  if (klass == NULL) {
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(
+      hs.NewHandle(ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader)));
+  if (klass.Get() == NULL) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
 
   if (is_static) {
-    resolved = klass->FindStaticField(dex_cache.get(), field_idx);
+    resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
   } else {
-    resolved = klass->FindInstanceField(dex_cache.get(), field_idx);
+    resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
 
   if (resolved == NULL) {
     const char* name = dex_file.GetFieldName(field_id);
     const char* type = dex_file.GetFieldTypeDescriptor(field_id);
     if (is_static) {
-      resolved = klass->FindStaticField(name, type);
+      resolved = mirror::Class::FindStaticField(self, klass, name, type);
     } else {
       resolved = klass->FindInstanceField(name, type);
     }
     if (resolved == NULL) {
-      ThrowNoSuchFieldError(is_static ? "static " : "instance ", klass, type, name);
+      ThrowNoSuchFieldError(is_static ? "static " : "instance ", klass.Get(), type, name);
       return NULL;
     }
   }
@@ -4393,16 +4376,19 @@
 
 mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
                                                uint32_t field_idx,
-                                               const SirtRef<mirror::DexCache>& dex_cache,
-                                               const SirtRef<mirror::ClassLoader>& class_loader) {
-  DCHECK(dex_cache.get() != nullptr);
+                                               Handle<mirror::DexCache> dex_cache,
+                                               Handle<mirror::ClassLoader> class_loader) {
+  DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
   }
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
-  mirror::Class* klass = ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader);
-  if (klass == NULL) {
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(
+      hs.NewHandle(ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader)));
+  if (klass.Get() == NULL) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
@@ -4410,11 +4396,11 @@
   StringPiece name(dex_file.StringDataByIdx(field_id.name_idx_));
   StringPiece type(dex_file.StringDataByIdx(
       dex_file.GetTypeId(field_id.type_idx_).descriptor_idx_));
-  resolved = klass->FindField(name, type);
+  resolved = mirror::Class::FindField(self, klass, name, type);
   if (resolved != NULL) {
     dex_cache->SetResolvedField(field_idx, resolved);
   } else {
-    ThrowNoSuchFieldError("", klass, type, name);
+    ThrowNoSuchFieldError("", klass.Get(), type, name);
   }
   return resolved;
 }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a23add0..a8271ed 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -46,8 +46,8 @@
 
 class InternTable;
 template<class T> class ObjectLock;
-class ScopedObjectAccess;
-template<class T> class SirtRef;
+class ScopedObjectAccessAlreadyRunnable;
+template<class T> class Handle;
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
 
@@ -75,7 +75,7 @@
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
   mirror::Class* FindClass(Thread* self, const char* descriptor,
-                           const SirtRef<mirror::ClassLoader>& class_loader)
+                           Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Finds a class by its descriptor using the "system" class loader, ie by searching the
@@ -92,7 +92,7 @@
 
   // Define a new a class based on a ClassDef from a DexFile
   mirror::Class* DefineClass(const char* descriptor,
-                             const SirtRef<mirror::ClassLoader>& class_loader,
+                             Handle<mirror::ClassLoader> class_loader,
                              const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -136,7 +136,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
   mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                const SirtRef<mirror::DexCache>& dex_cache)
+                                Handle<mirror::DexCache> dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
@@ -159,8 +159,8 @@
   // type, since it may be referenced from but not contained within
   // the given DexFile.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                             const SirtRef<mirror::DexCache>& dex_cache,
-                             const SirtRef<mirror::ClassLoader>& class_loader)
+                             Handle<mirror::DexCache> dex_cache,
+                             Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a method with a given ID from the DexFile, storing the
@@ -170,16 +170,21 @@
   // virtual method.
   mirror::ArtMethod* ResolveMethod(const DexFile& dex_file,
                                    uint32_t method_idx,
-                                   const SirtRef<mirror::DexCache>& dex_cache,
-                                   const SirtRef<mirror::ClassLoader>& class_loader,
-                                   mirror::ArtMethod* referrer,
+                                   Handle<mirror::DexCache> dex_cache,
+                                   Handle<mirror::ClassLoader> class_loader,
+                                   Handle<mirror::ArtMethod> referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
+  mirror::ArtMethod* GetResolvedMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
+                                       InvokeType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, mirror::ArtMethod** referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -191,8 +196,8 @@
   // field.
   mirror::ArtField* ResolveField(const DexFile& dex_file,
                                  uint32_t field_idx,
-                                 const SirtRef<mirror::DexCache>& dex_cache,
-                                 const SirtRef<mirror::ClassLoader>& class_loader,
+                                 Handle<mirror::DexCache> dex_cache,
+                                 Handle<mirror::ClassLoader> class_loader,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -200,10 +205,9 @@
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. No is_static argument is provided so that Java
   // field resolution semantics are followed.
-  mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file,
-                                    uint32_t field_idx,
-                                    const SirtRef<mirror::DexCache>& dex_cache,
-                                    const SirtRef<mirror::ClassLoader>& class_loader)
+  mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+                                    Handle<mirror::DexCache> dex_cache,
+                                    Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
@@ -213,8 +217,7 @@
   // Returns true on success, false if there's an exception pending.
   // can_run_clinit=false allows the compiler to attempt to init a class,
   // given the restriction that no <clinit> execution is possible.
-  bool EnsureInitialized(const SirtRef<mirror::Class>& c,
-                         bool can_init_fields, bool can_init_parents)
+  bool EnsureInitialized(Handle<mirror::Class> c, bool can_init_fields, bool can_init_parents)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Initializes classes that have instances in the image but that have
@@ -224,7 +227,7 @@
   void RegisterDexFile(const DexFile& dex_file)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void RegisterDexFile(const DexFile& dex_file, const SirtRef<mirror::DexCache>& dex_cache)
+  void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -280,6 +283,7 @@
   // does not match the OatFile.
   const DexFile* FindDexFileInOatFileFromDexLocation(const char* location,
                                                      const uint32_t* const location_checksum,
+                                                     InstructionSet isa,
                                                      std::vector<std::string>* error_msgs)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
@@ -288,6 +292,7 @@
   static bool VerifyOatFileChecksums(const OatFile* oat_file,
                                      const char* dex_location,
                                      uint32_t dex_location_checksum,
+                                     InstructionSet instruction_set,
                                      std::string* error_msg);
 
   // TODO: replace this with multiple methods that allocate the correct managed type.
@@ -314,18 +319,19 @@
                                                                               size_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyClass(const SirtRef<mirror::Class>& klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VerifyClass(Handle<mirror::Class> klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file, mirror::Class* klass,
                                mirror::Class::Status& oat_file_class_status)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
-                                         const SirtRef<mirror::Class>& klass)
+                                         Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
-                                  jobject loader, jobjectArray methods, jobjectArray throws)
+  mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
+                                  jobjectArray interfaces, jobject loader, jobjectArray methods,
+                                  jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -354,8 +360,8 @@
   }
 
   const void* GetQuickGenericJniTrampoline() const {
-      return quick_generic_jni_trampoline_;
-    }
+    return quick_generic_jni_trampoline_;
+  }
 
   const void* GetQuickResolutionTrampoline() const {
     return quick_resolution_trampoline_;
@@ -417,12 +423,12 @@
 
 
   mirror::Class* CreateArrayClass(Thread* self, const char* descriptor,
-                                  const SirtRef<mirror::ClassLoader>& class_loader)
+                                  Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AppendToBootClassPath(const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void AppendToBootClassPath(const DexFile& dex_file, const SirtRef<mirror::DexCache>& dex_cache)
+  void AppendToBootClassPath(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ConstructFieldMap(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
@@ -434,23 +440,23 @@
 
   void LoadClass(const DexFile& dex_file,
                  const DexFile::ClassDef& dex_class_def,
-                 const SirtRef<mirror::Class>& klass,
+                 Handle<mirror::Class> klass,
                  mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void LoadClassMembers(const DexFile& dex_file,
                         const byte* class_data,
-                        const SirtRef<mirror::Class>& klass,
+                        Handle<mirror::Class> klass,
                         mirror::ClassLoader* class_loader,
                         const OatFile::OatClass* oat_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void LoadField(const DexFile& dex_file, const ClassDataItemIterator& it,
-                 const SirtRef<mirror::Class>& klass, const SirtRef<mirror::ArtField>& dst)
+                 Handle<mirror::Class> klass, Handle<mirror::ArtField> dst)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::ArtMethod* LoadMethod(Thread* self, const DexFile& dex_file,
                                 const ClassDataItemIterator& dex_method,
-                                const SirtRef<mirror::Class>& klass)
+                                Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FixupStaticTrampolines(mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -459,23 +465,23 @@
   OatFile::OatClass GetOatClass(const DexFile& dex_file, uint16_t class_def_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void RegisterDexFileLocked(const DexFile& dex_file, const SirtRef<mirror::DexCache>& dex_cache)
+  void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       EXCLUSIVE_LOCKS_REQUIRED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsDexFileRegisteredLocked(const DexFile& dex_file) const
       SHARED_LOCKS_REQUIRED(dex_lock_, Locks::mutator_lock_);
 
-  bool InitializeClass(const SirtRef<mirror::Class>& klass, bool can_run_clinit,
+  bool InitializeClass(Handle<mirror::Class> klass, bool can_run_clinit,
                        bool can_init_parents)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool WaitForInitializeClass(const SirtRef<mirror::Class>& klass, Thread* self,
+  bool WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
                               ObjectLock<mirror::Class>& lock);
-  bool ValidateSuperClassDescriptors(const SirtRef<mirror::Class>& klass)
+  bool ValidateSuperClassDescriptors(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                SirtRef<mirror::ClassLoader>& class_loader1,
-                                                SirtRef<mirror::ClassLoader>& class_loader2)
+                                                Handle<mirror::ClassLoader> class_loader1,
+                                                Handle<mirror::ClassLoader> class_loader2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsSameMethodSignatureInDifferentClassContexts(Thread* self, mirror::ArtMethod* method,
@@ -483,40 +489,43 @@
                                                      mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkClass(Thread* self, const SirtRef<mirror::Class>& klass,
-                 const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
+  bool LinkClass(Thread* self, Handle<mirror::Class> klass,
+                 Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkSuperClass(const SirtRef<mirror::Class>& klass)
+  bool LinkSuperClass(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LoadSuperAndInterfaces(const SirtRef<mirror::Class>& klass, const DexFile& dex_file)
+  bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkMethods(const SirtRef<mirror::Class>& klass,
-                   const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
+  bool LinkMethods(Handle<mirror::Class> klass,
+                   Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkVirtualMethods(const SirtRef<mirror::Class>& klass)
+  bool LinkVirtualMethods(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkInterfaceMethods(const SirtRef<mirror::Class>& klass,
-                            const SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
+  bool LinkInterfaceMethods(Handle<mirror::Class> klass,
+                            Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkStaticFields(const SirtRef<mirror::Class>& klass)
+  bool LinkStaticFields(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool LinkInstanceFields(const SirtRef<mirror::Class>& klass)
+  bool LinkInstanceFields(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool LinkFields(const SirtRef<mirror::Class>& klass, bool is_static)
+  bool LinkFields(Handle<mirror::Class> klass, bool is_static)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void LinkCode(Handle<mirror::ArtMethod> method, const OatFile::OatClass* oat_class,
+                const DexFile& dex_file, uint32_t dex_method_index, uint32_t method_index)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
-  void CreateReferenceInstanceOffsets(const SirtRef<mirror::Class>& klass)
+  void CreateReferenceInstanceOffsets(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CreateReferenceStaticOffsets(const SirtRef<mirror::Class>& klass)
+  void CreateReferenceStaticOffsets(Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CreateReferenceOffsets(const SirtRef<mirror::Class>& klass, bool is_static,
+  void CreateReferenceOffsets(Handle<mirror::Class> klass, bool is_static,
                               uint32_t reference_offsets)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -545,11 +554,11 @@
                                                  bool* open_failed)
       LOCKS_EXCLUDED(dex_lock_);
 
-  mirror::ArtMethod* CreateProxyConstructor(Thread* self, const SirtRef<mirror::Class>& klass,
+  mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass,
                                             mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* CreateProxyMethod(Thread* self, const SirtRef<mirror::Class>& klass,
-                                       const SirtRef<mirror::ArtMethod>& prototype)
+  mirror::ArtMethod* CreateProxyMethod(Thread* self, Handle<mirror::Class> klass,
+                                       Handle<mirror::ArtMethod> prototype)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
@@ -565,7 +574,7 @@
   // Class::descriptor_ and Class::class_loader_.
   typedef std::multimap<size_t, mirror::Class*> Table;
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  std::vector<std::pair<size_t, mirror::Class*> > new_class_roots_;
+  std::vector<std::pair<size_t, mirror::Class*>> new_class_roots_;
 
   // Do we need to search dex caches to find image classes?
   bool dex_cache_image_class_lookup_required_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 1218357..c11aecc 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -16,9 +16,9 @@
 
 #include "class_linker.h"
 
+#include <memory>
 #include <string>
 
-#include "UniquePtr.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -34,7 +34,7 @@
 #include "mirror/proxy.h"
 #include "mirror/reference.h"
 #include "mirror/stack_trace_element.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -60,12 +60,11 @@
 
   void AssertPrimitiveClass(const std::string& descriptor, mirror::Class* primitive)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ClassHelper primitive_ch(primitive);
     ASSERT_TRUE(primitive != NULL);
     ASSERT_TRUE(primitive->GetClass() != NULL);
     ASSERT_EQ(primitive->GetClass(), primitive->GetClass()->GetClass());
     EXPECT_TRUE(primitive->GetClass()->GetSuperClass() != NULL);
-    ASSERT_STREQ(descriptor.c_str(), primitive_ch.GetDescriptor());
+    ASSERT_STREQ(descriptor.c_str(), primitive->GetDescriptor().c_str());
     EXPECT_TRUE(primitive->GetSuperClass() == NULL);
     EXPECT_FALSE(primitive->HasSuperClass());
     EXPECT_TRUE(primitive->GetClassLoader() == NULL);
@@ -87,7 +86,7 @@
     EXPECT_EQ(0U, primitive->NumVirtualMethods());
     EXPECT_EQ(0U, primitive->NumInstanceFields());
     EXPECT_EQ(0U, primitive->NumStaticFields());
-    EXPECT_EQ(0U, primitive_ch.NumDirectInterfaces());
+    EXPECT_EQ(0U, primitive->NumDirectInterfaces());
     EXPECT_TRUE(primitive->GetVTable() == NULL);
     EXPECT_EQ(0, primitive->GetIfTableCount());
     EXPECT_TRUE(primitive->GetIfTable() == NULL);
@@ -99,31 +98,29 @@
                         mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Thread* self = Thread::Current();
-    SirtRef<mirror::ClassLoader> loader(self, class_loader);
-    SirtRef<mirror::Class> array(self,
-                                 class_linker_->FindClass(self, array_descriptor.c_str(), loader));
-    ClassHelper array_component_ch(array->GetComponentType());
-    EXPECT_STREQ(component_type.c_str(), array_component_ch.GetDescriptor());
+    StackHandleScope<2> hs(self);
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader));
+    Handle<mirror::Class> array(
+        hs.NewHandle(class_linker_->FindClass(self, array_descriptor.c_str(), loader)));
+    EXPECT_STREQ(component_type.c_str(), array->GetComponentType()->GetDescriptor().c_str());
     EXPECT_EQ(class_loader, array->GetClassLoader());
     EXPECT_EQ(kAccFinal | kAccAbstract, (array->GetAccessFlags() & (kAccFinal | kAccAbstract)));
     AssertArrayClass(array_descriptor, array);
   }
 
-  void AssertArrayClass(const std::string& array_descriptor, const SirtRef<mirror::Class>& array)
+  void AssertArrayClass(const std::string& array_descriptor, Handle<mirror::Class> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ClassHelper kh(array.get());
-    ASSERT_TRUE(array.get() != NULL);
+    ASSERT_TRUE(array.Get() != NULL);
     ASSERT_TRUE(array->GetClass() != NULL);
     ASSERT_EQ(array->GetClass(), array->GetClass()->GetClass());
     EXPECT_TRUE(array->GetClass()->GetSuperClass() != NULL);
-    ASSERT_STREQ(array_descriptor.c_str(), kh.GetDescriptor());
+    ASSERT_STREQ(array_descriptor.c_str(), array->GetDescriptor().c_str());
     EXPECT_TRUE(array->GetSuperClass() != NULL);
     Thread* self = Thread::Current();
     EXPECT_EQ(class_linker_->FindSystemClass(self, "Ljava/lang/Object;"), array->GetSuperClass());
     EXPECT_TRUE(array->HasSuperClass());
     ASSERT_TRUE(array->GetComponentType() != NULL);
-    kh.ChangeClass(array->GetComponentType());
-    ASSERT_TRUE(kh.GetDescriptor() != NULL);
+    ASSERT_TRUE(!array->GetComponentType()->GetDescriptor().empty());
     EXPECT_EQ(mirror::Class::kStatusInitialized, array->GetStatus());
     EXPECT_FALSE(array->IsErroneous());
     EXPECT_TRUE(array->IsLoaded());
@@ -141,17 +138,16 @@
     EXPECT_EQ(0U, array->NumVirtualMethods());
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
-    kh.ChangeClass(array.get());
-    EXPECT_EQ(2U, kh.NumDirectInterfaces());
+    EXPECT_EQ(2U, array->NumDirectInterfaces());
     EXPECT_TRUE(array->GetVTable() != NULL);
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != NULL);
-    kh.ChangeClass(kh.GetDirectInterface(0));
-    EXPECT_STREQ(kh.GetDescriptor(), "Ljava/lang/Cloneable;");
-    kh.ChangeClass(array.get());
-    kh.ChangeClass(kh.GetDirectInterface(1));
-    EXPECT_STREQ(kh.GetDescriptor(), "Ljava/io/Serializable;");
-    EXPECT_EQ(class_linker_->FindArrayClass(self, array->GetComponentType()), array.get());
+    mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
+    EXPECT_TRUE(direct_interface0 != nullptr);
+    EXPECT_STREQ(direct_interface0->GetDescriptor().c_str(), "Ljava/lang/Cloneable;");
+    mirror::Class* direct_interface1 = mirror::Class::GetDirectInterface(self, array, 1);
+    EXPECT_STREQ(direct_interface1->GetDescriptor().c_str(), "Ljava/io/Serializable;");
+    EXPECT_EQ(class_linker_->FindArrayClass(self, array->GetComponentType()), array.Get());
   }
 
   void AssertMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -182,10 +178,9 @@
     EXPECT_TRUE(fh.GetType() != NULL);
   }
 
-  void AssertClass(const std::string& descriptor, const SirtRef<mirror::Class>& klass)
+  void AssertClass(const std::string& descriptor, Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ClassHelper kh(klass.get());
-    EXPECT_STREQ(descriptor.c_str(), kh.GetDescriptor());
+    EXPECT_STREQ(descriptor.c_str(), klass->GetDescriptor().c_str());
     if (descriptor == "Ljava/lang/Object;") {
       EXPECT_FALSE(klass->HasSuperClass());
     } else {
@@ -200,8 +195,9 @@
     EXPECT_FALSE(klass->IsErroneous());
     EXPECT_FALSE(klass->IsArrayClass());
     EXPECT_TRUE(klass->GetComponentType() == NULL);
-    EXPECT_TRUE(klass->IsInSamePackage(klass.get()));
-    EXPECT_TRUE(mirror::Class::IsInSamePackage(kh.GetDescriptor(), kh.GetDescriptor()));
+    EXPECT_TRUE(klass->IsInSamePackage(klass.Get()));
+    EXPECT_TRUE(mirror::Class::IsInSamePackage(klass->GetDescriptor().c_str(),
+                                               klass->GetDescriptor().c_str()));
     if (klass->IsInterface()) {
       EXPECT_TRUE(klass->IsAbstract());
       if (klass->NumDirectMethods() == 1) {
@@ -242,31 +238,31 @@
     }
 
     EXPECT_FALSE(klass->IsPrimitive());
-    EXPECT_TRUE(klass->CanAccess(klass.get()));
+    EXPECT_TRUE(klass->CanAccess(klass.Get()));
 
     for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
       mirror::ArtMethod* method = klass->GetDirectMethod(i);
       AssertMethod(method);
       EXPECT_TRUE(method->IsDirect());
-      EXPECT_EQ(klass.get(), method->GetDeclaringClass());
+      EXPECT_EQ(klass.Get(), method->GetDeclaringClass());
     }
 
     for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
       mirror::ArtMethod* method = klass->GetVirtualMethod(i);
       AssertMethod(method);
       EXPECT_FALSE(method->IsDirect());
-      EXPECT_TRUE(method->GetDeclaringClass()->IsAssignableFrom(klass.get()));
+      EXPECT_TRUE(method->GetDeclaringClass()->IsAssignableFrom(klass.Get()));
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
       mirror::ArtField* field = klass->GetInstanceField(i);
-      AssertField(klass.get(), field);
+      AssertField(klass.Get(), field);
       EXPECT_FALSE(field->IsStatic());
     }
 
     for (size_t i = 0; i < klass->NumStaticFields(); i++) {
       mirror::ArtField* field = klass->GetStaticField(i);
-      AssertField(klass.get(), field);
+      AssertField(klass.Get(), field);
       EXPECT_TRUE(field->IsStatic());
     }
 
@@ -294,7 +290,7 @@
     }
 
     size_t total_num_reference_instance_fields = 0;
-    mirror::Class* k = klass.get();
+    mirror::Class* k = klass.Get();
     while (k != NULL) {
       total_num_reference_instance_fields += k->NumReferenceInstanceFields();
       k = k->GetSuperClass();
@@ -306,12 +302,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ASSERT_TRUE(descriptor != NULL);
     Thread* self = Thread::Current();
-    SirtRef<mirror::Class> klass(self, class_linker_->FindSystemClass(self, descriptor.c_str()));
-    ASSERT_TRUE(klass.get() != nullptr);
-    EXPECT_STREQ(descriptor.c_str(), ClassHelper(klass.get()).GetDescriptor());
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> klass(
+        hs.NewHandle(class_linker_->FindSystemClass(self, descriptor.c_str())));
+    ASSERT_TRUE(klass.Get() != nullptr);
+    EXPECT_STREQ(descriptor.c_str(), klass.Get()->GetDescriptor().c_str());
     EXPECT_EQ(class_loader, klass->GetClassLoader());
     if (klass->IsPrimitive()) {
-      AssertPrimitiveClass(descriptor, klass.get());
+      AssertPrimitiveClass(descriptor, klass.Get());
     } else if (klass->IsArrayClass()) {
       AssertArrayClass(descriptor, klass);
     } else {
@@ -491,9 +489,6 @@
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_code_item_offset_),           "dexCodeItemOffset"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_method_index_),               "dexMethodIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_index_),                   "methodIndex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_core_spill_mask_),          "quickCoreSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_fp_spill_mask_),            "quickFpSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_frame_size_in_bytes_),      "quickFrameSizeInBytes"));
   };
 };
 
@@ -674,7 +669,9 @@
 
 TEST_F(ClassLinkerTest, FindClassNested) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Nested")));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Nested"))));
 
   mirror::Class* outer = class_linker_->FindClass(soa.Self(), "LNested;", class_loader);
   ASSERT_TRUE(outer != NULL);
@@ -704,12 +701,11 @@
 TEST_F(ClassLinkerTest, FindClass) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::Class* JavaLangObject = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
-  ClassHelper kh(JavaLangObject);
   ASSERT_TRUE(JavaLangObject != NULL);
   ASSERT_TRUE(JavaLangObject->GetClass() != NULL);
   ASSERT_EQ(JavaLangObject->GetClass(), JavaLangObject->GetClass()->GetClass());
   EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
-  ASSERT_STREQ(kh.GetDescriptor(), "Ljava/lang/Object;");
+  ASSERT_STREQ(JavaLangObject->GetDescriptor().c_str(), "Ljava/lang/Object;");
   EXPECT_TRUE(JavaLangObject->GetSuperClass() == NULL);
   EXPECT_FALSE(JavaLangObject->HasSuperClass());
   EXPECT_TRUE(JavaLangObject->GetClassLoader() == NULL);
@@ -746,20 +742,21 @@
   }
 
   EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
-  EXPECT_EQ(0U, kh.NumDirectInterfaces());
+  EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
 
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass"))));
   AssertNonExistentClass("LMyClass;");
   mirror::Class* MyClass = class_linker_->FindClass(soa.Self(), "LMyClass;", class_loader);
-  kh.ChangeClass(MyClass);
   ASSERT_TRUE(MyClass != NULL);
   ASSERT_TRUE(MyClass->GetClass() != NULL);
   ASSERT_EQ(MyClass->GetClass(), MyClass->GetClass()->GetClass());
   EXPECT_EQ(JavaLangObject, MyClass->GetClass()->GetSuperClass());
-  ASSERT_STREQ(kh.GetDescriptor(), "LMyClass;");
+  ASSERT_STREQ(MyClass->GetDescriptor().c_str(), "LMyClass;");
   EXPECT_TRUE(MyClass->GetSuperClass() == JavaLangObject);
   EXPECT_TRUE(MyClass->HasSuperClass());
-  EXPECT_EQ(class_loader.get(), MyClass->GetClassLoader());
+  EXPECT_EQ(class_loader.Get(), MyClass->GetClassLoader());
   EXPECT_EQ(mirror::Class::kStatusResolved, MyClass->GetStatus());
   EXPECT_FALSE(MyClass->IsErroneous());
   EXPECT_TRUE(MyClass->IsLoaded());
@@ -778,7 +775,7 @@
   EXPECT_EQ(0U, MyClass->NumVirtualMethods());
   EXPECT_EQ(0U, MyClass->NumInstanceFields());
   EXPECT_EQ(0U, MyClass->NumStaticFields());
-  EXPECT_EQ(0U, kh.NumDirectInterfaces());
+  EXPECT_EQ(0U, MyClass->NumDirectInterfaces());
 
   EXPECT_EQ(JavaLangObject->GetClass()->GetClass(), MyClass->GetClass()->GetClass());
 
@@ -787,7 +784,7 @@
   AssertArrayClass("[Ljava/lang/Object;", "Ljava/lang/Object;", NULL);
   // synthesized on the fly
   AssertArrayClass("[[C", "[C", NULL);
-  AssertArrayClass("[[[LMyClass;", "[[LMyClass;", class_loader.get());
+  AssertArrayClass("[[[LMyClass;", "[[LMyClass;", class_loader.Get());
   // or not available at all
   AssertNonExistentClass("[[[[LNonExistentClass;");
 }
@@ -816,27 +813,28 @@
 
 TEST_F(ClassLinkerTest, ValidatePrimitiveArrayElementsOffset) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::LongArray> long_array(soa.Self(), mirror::LongArray::Alloc(soa.Self(), 0));
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::LongArray> long_array(hs.NewHandle(mirror::LongArray::Alloc(soa.Self(), 0)));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "[J"), long_array->GetClass());
   uintptr_t data_offset = reinterpret_cast<uintptr_t>(long_array->GetData());
   EXPECT_TRUE(IsAligned<8>(data_offset));  // Longs require 8 byte alignment
 
-  SirtRef<mirror::DoubleArray> double_array(soa.Self(), mirror::DoubleArray::Alloc(soa.Self(), 0));
+  Handle<mirror::DoubleArray> double_array(hs.NewHandle(mirror::DoubleArray::Alloc(soa.Self(), 0)));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "[D"), double_array->GetClass());
   data_offset = reinterpret_cast<uintptr_t>(double_array->GetData());
   EXPECT_TRUE(IsAligned<8>(data_offset));  // Doubles require 8 byte alignment
 
-  SirtRef<mirror::IntArray> int_array(soa.Self(), mirror::IntArray::Alloc(soa.Self(), 0));
+  Handle<mirror::IntArray> int_array(hs.NewHandle(mirror::IntArray::Alloc(soa.Self(), 0)));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "[I"), int_array->GetClass());
   data_offset = reinterpret_cast<uintptr_t>(int_array->GetData());
   EXPECT_TRUE(IsAligned<4>(data_offset));  // Ints require 4 byte alignment
 
-  SirtRef<mirror::CharArray> char_array(soa.Self(), mirror::CharArray::Alloc(soa.Self(), 0));
+  Handle<mirror::CharArray> char_array(hs.NewHandle(mirror::CharArray::Alloc(soa.Self(), 0)));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "[C"), char_array->GetClass());
   data_offset = reinterpret_cast<uintptr_t>(char_array->GetData());
   EXPECT_TRUE(IsAligned<2>(data_offset));  // Chars require 2 byte alignment
 
-  SirtRef<mirror::ShortArray> short_array(soa.Self(), mirror::ShortArray::Alloc(soa.Self(), 0));
+  Handle<mirror::ShortArray> short_array(hs.NewHandle(mirror::ShortArray::Alloc(soa.Self(), 0)));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "[S"), short_array->GetClass());
   data_offset = reinterpret_cast<uintptr_t>(short_array->GetData());
   EXPECT_TRUE(IsAligned<2>(data_offset));  // Shorts require 2 byte alignment
@@ -848,7 +846,7 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+  NullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
   FieldHelper fh(c->GetIFields()->Get(0));
@@ -878,21 +876,25 @@
 
 TEST_F(ClassLinkerTest, TwoClassLoadersOneClass) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader_1(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
-  SirtRef<mirror::ClassLoader> class_loader_2(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader_1(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass"))));
+  Handle<mirror::ClassLoader> class_loader_2(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass"))));
   mirror::Class* MyClass_1 = class_linker_->FindClass(soa.Self(), "LMyClass;", class_loader_1);
   mirror::Class* MyClass_2 = class_linker_->FindClass(soa.Self(), "LMyClass;", class_loader_2);
-  EXPECT_TRUE(MyClass_1 != NULL);
-  EXPECT_TRUE(MyClass_2 != NULL);
+  EXPECT_TRUE(MyClass_1 != nullptr);
+  EXPECT_TRUE(MyClass_2 != nullptr);
   EXPECT_NE(MyClass_1, MyClass_2);
 }
 
 TEST_F(ClassLinkerTest, StaticFields) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(LoadDex("Statics")));
-  SirtRef<mirror::Class> statics(soa.Self(), class_linker_->FindClass(soa.Self(), "LStatics;",
-                                                                      class_loader));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Statics"))));
+  Handle<mirror::Class> statics(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStatics;", class_loader)));
   class_linker_->EnsureInitialized(statics, true, true);
 
   // Static final primitives that are initialized by a compile-time constant
@@ -903,88 +905,96 @@
 
   EXPECT_EQ(9U, statics->NumStaticFields());
 
-  mirror::ArtField* s0 = statics->FindStaticField("s0", "Z");
+  mirror::ArtField* s0 = mirror::Class::FindStaticField(soa.Self(), statics, "s0", "Z");
   FieldHelper fh(s0);
-  EXPECT_STREQ(ClassHelper(s0->GetClass()).GetDescriptor(), "Ljava/lang/reflect/ArtField;");
+  EXPECT_STREQ(s0->GetClass()->GetDescriptor().c_str(), "Ljava/lang/reflect/ArtField;");
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimBoolean);
-  EXPECT_EQ(true, s0->GetBoolean(statics.get()));
-  s0->SetBoolean<false>(statics.get(), false);
+  EXPECT_EQ(true, s0->GetBoolean(statics.Get()));
+  s0->SetBoolean<false>(statics.Get(), false);
 
-  mirror::ArtField* s1 = statics->FindStaticField("s1", "B");
+  mirror::ArtField* s1 = mirror::Class::FindStaticField(soa.Self(), statics, "s1", "B");
   fh.ChangeField(s1);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimByte);
-  EXPECT_EQ(5, s1->GetByte(statics.get()));
-  s1->SetByte<false>(statics.get(), 6);
+  EXPECT_EQ(5, s1->GetByte(statics.Get()));
+  s1->SetByte<false>(statics.Get(), 6);
 
-  mirror::ArtField* s2 = statics->FindStaticField("s2", "C");
+  mirror::ArtField* s2 = mirror::Class::FindStaticField(soa.Self(), statics, "s2", "C");
   fh.ChangeField(s2);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimChar);
-  EXPECT_EQ('a', s2->GetChar(statics.get()));
-  s2->SetChar<false>(statics.get(), 'b');
+  EXPECT_EQ('a', s2->GetChar(statics.Get()));
+  s2->SetChar<false>(statics.Get(), 'b');
 
-  mirror::ArtField* s3 = statics->FindStaticField("s3", "S");
+  mirror::ArtField* s3 = mirror::Class::FindStaticField(soa.Self(), statics, "s3", "S");
   fh.ChangeField(s3);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimShort);
-  EXPECT_EQ(-536, s3->GetShort(statics.get()));
-  s3->SetShort<false>(statics.get(), -535);
+  EXPECT_EQ(-536, s3->GetShort(statics.Get()));
+  s3->SetShort<false>(statics.Get(), -535);
 
-  mirror::ArtField* s4 = statics->FindStaticField("s4", "I");
+  mirror::ArtField* s4 = mirror::Class::FindStaticField(soa.Self(), statics, "s4", "I");
   fh.ChangeField(s4);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimInt);
-  EXPECT_EQ(2000000000, s4->GetInt(statics.get()));
-  s4->SetInt<false>(statics.get(), 2000000001);
+  EXPECT_EQ(2000000000, s4->GetInt(statics.Get()));
+  s4->SetInt<false>(statics.Get(), 2000000001);
 
-  mirror::ArtField* s5 = statics->FindStaticField("s5", "J");
+  mirror::ArtField* s5 = mirror::Class::FindStaticField(soa.Self(), statics, "s5", "J");
   fh.ChangeField(s5);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimLong);
-  EXPECT_EQ(0x1234567890abcdefLL, s5->GetLong(statics.get()));
-  s5->SetLong<false>(statics.get(), INT64_C(0x34567890abcdef12));
+  EXPECT_EQ(0x1234567890abcdefLL, s5->GetLong(statics.Get()));
+  s5->SetLong<false>(statics.Get(), INT64_C(0x34567890abcdef12));
 
-  mirror::ArtField* s6 = statics->FindStaticField("s6", "F");
+  mirror::ArtField* s6 = mirror::Class::FindStaticField(soa.Self(), statics, "s6", "F");
   fh.ChangeField(s6);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimFloat);
-  EXPECT_EQ(0.5, s6->GetFloat(statics.get()));
-  s6->SetFloat<false>(statics.get(), 0.75);
+  EXPECT_EQ(0.5, s6->GetFloat(statics.Get()));
+  s6->SetFloat<false>(statics.Get(), 0.75);
 
-  mirror::ArtField* s7 = statics->FindStaticField("s7", "D");
+  mirror::ArtField* s7 = mirror::Class::FindStaticField(soa.Self(), statics, "s7", "D");
   fh.ChangeField(s7);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimDouble);
-  EXPECT_EQ(16777217, s7->GetDouble(statics.get()));
-  s7->SetDouble<false>(statics.get(), 16777219);
+  EXPECT_EQ(16777217, s7->GetDouble(statics.Get()));
+  s7->SetDouble<false>(statics.Get(), 16777219);
 
-  mirror::ArtField* s8 = statics->FindStaticField("s8", "Ljava/lang/String;");
+  mirror::ArtField* s8 = mirror::Class::FindStaticField(soa.Self(), statics, "s8",
+                                                        "Ljava/lang/String;");
   fh.ChangeField(s8);
   EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimNot);
-  EXPECT_TRUE(s8->GetObject(statics.get())->AsString()->Equals("android"));
+  EXPECT_TRUE(s8->GetObject(statics.Get())->AsString()->Equals("android"));
   s8->SetObject<false>(s8->GetDeclaringClass(),
                        mirror::String::AllocFromModifiedUtf8(soa.Self(), "robot"));
 
   // TODO: Remove EXPECT_FALSE when GCC can handle EXPECT_EQ
   // http://code.google.com/p/googletest/issues/detail?id=322
-  EXPECT_FALSE(s0->GetBoolean(statics.get()));
-  EXPECT_EQ(6, s1->GetByte(statics.get()));
-  EXPECT_EQ('b', s2->GetChar(statics.get()));
-  EXPECT_EQ(-535, s3->GetShort(statics.get()));
-  EXPECT_EQ(2000000001, s4->GetInt(statics.get()));
-  EXPECT_EQ(INT64_C(0x34567890abcdef12), s5->GetLong(statics.get()));
-  EXPECT_EQ(0.75, s6->GetFloat(statics.get()));
-  EXPECT_EQ(16777219, s7->GetDouble(statics.get()));
-  EXPECT_TRUE(s8->GetObject(statics.get())->AsString()->Equals("robot"));
+  EXPECT_FALSE(s0->GetBoolean(statics.Get()));
+  EXPECT_EQ(6, s1->GetByte(statics.Get()));
+  EXPECT_EQ('b', s2->GetChar(statics.Get()));
+  EXPECT_EQ(-535, s3->GetShort(statics.Get()));
+  EXPECT_EQ(2000000001, s4->GetInt(statics.Get()));
+  EXPECT_EQ(INT64_C(0x34567890abcdef12), s5->GetLong(statics.Get()));
+  EXPECT_EQ(0.75, s6->GetFloat(statics.Get()));
+  EXPECT_EQ(16777219, s7->GetDouble(statics.Get()));
+  EXPECT_TRUE(s8->GetObject(statics.Get())->AsString()->Equals("robot"));
 }
 
 TEST_F(ClassLinkerTest, Interfaces) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Interfaces")));
-  mirror::Class* I = class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader);
-  mirror::Class* J = class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader);
-  mirror::Class* K = class_linker_->FindClass(soa.Self(), "LInterfaces$K;", class_loader);
-  mirror::Class* A = class_linker_->FindClass(soa.Self(), "LInterfaces$A;", class_loader);
-  mirror::Class* B = class_linker_->FindClass(soa.Self(), "LInterfaces$B;", class_loader);
-  EXPECT_TRUE(I->IsAssignableFrom(A));
-  EXPECT_TRUE(J->IsAssignableFrom(A));
-  EXPECT_TRUE(J->IsAssignableFrom(K));
-  EXPECT_TRUE(K->IsAssignableFrom(B));
-  EXPECT_TRUE(J->IsAssignableFrom(B));
+  StackHandleScope<6> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Interfaces"))));
+  Handle<mirror::Class> I(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader)));
+  Handle<mirror::Class> J(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader)));
+  Handle<mirror::Class> K(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInterfaces$K;", class_loader)));
+  Handle<mirror::Class> A(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInterfaces$A;", class_loader)));
+  Handle<mirror::Class> B(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInterfaces$B;", class_loader)));
+  EXPECT_TRUE(I->IsAssignableFrom(A.Get()));
+  EXPECT_TRUE(J->IsAssignableFrom(A.Get()));
+  EXPECT_TRUE(J->IsAssignableFrom(K.Get()));
+  EXPECT_TRUE(K->IsAssignableFrom(B.Get()));
+  EXPECT_TRUE(J->IsAssignableFrom(B.Get()));
 
   const Signature void_sig = I->GetDexCache()->GetDexFile()->CreateSignature("()V");
   mirror::ArtMethod* Ii = I->FindVirtualMethod("i", void_sig);
@@ -1017,10 +1027,14 @@
   EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1));
   EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2));
 
-  mirror::ArtField* Afoo = A->FindStaticField("foo", "Ljava/lang/String;");
-  mirror::ArtField* Bfoo = B->FindStaticField("foo", "Ljava/lang/String;");
-  mirror::ArtField* Jfoo = J->FindStaticField("foo", "Ljava/lang/String;");
-  mirror::ArtField* Kfoo = K->FindStaticField("foo", "Ljava/lang/String;");
+  mirror::ArtField* Afoo = mirror::Class::FindStaticField(soa.Self(), A, "foo",
+                                                          "Ljava/lang/String;");
+  mirror::ArtField* Bfoo = mirror::Class::FindStaticField(soa.Self(), B, "foo",
+                                                          "Ljava/lang/String;");
+  mirror::ArtField* Jfoo = mirror::Class::FindStaticField(soa.Self(), J, "foo",
+                                                          "Ljava/lang/String;");
+  mirror::ArtField* Kfoo = mirror::Class::FindStaticField(soa.Self(), K, "foo",
+                                                          "Ljava/lang/String;");
   ASSERT_TRUE(Afoo != NULL);
   EXPECT_EQ(Afoo, Bfoo);
   EXPECT_EQ(Afoo, Jfoo);
@@ -1035,7 +1049,9 @@
 
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("StaticsFromCode");
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(jclass_loader)[0];
   CHECK(dex_file != NULL);
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", class_loader);
@@ -1092,9 +1108,8 @@
   ScopedObjectAccess soa(Thread::Current());
   for (int i = 0; i < ClassLinker::kClassRootsMax; i++) {
     mirror::Class* klass = class_linker_->GetClassRoot(ClassLinker::ClassRoot(i));
-    ClassHelper kh(klass);
-    EXPECT_TRUE(kh.GetDescriptor() != NULL);
-    EXPECT_STREQ(kh.GetDescriptor(),
+    EXPECT_TRUE(!klass->GetDescriptor().empty());
+    EXPECT_STREQ(klass->GetDescriptor().c_str(),
                  class_linker_->GetClassRootDescriptor(ClassLinker::ClassRoot(i))) << " i = " << i;
   }
 }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 723e32c..bac212a 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -24,6 +24,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <fstream>
+#include <memory>
 
 #include "../../external/icu4c/common/unicode/uvernum.h"
 #include "base/macros.h"
@@ -47,7 +48,6 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utils.h"
-#include "UniquePtr.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
@@ -57,6 +57,9 @@
 class ScratchFile {
  public:
   ScratchFile() {
+    // ANDROID_DATA needs to be set
+    CHECK_NE(static_cast<char*>(nullptr), getenv("ANDROID_DATA")) <<
+        "Are you subclassing RuntimeTest?";
     filename_ = getenv("ANDROID_DATA");
     filename_ += "/TmpFile-XXXXXX";
     int fd = mkstemp(&filename_[0]);
@@ -72,9 +75,14 @@
     file_.reset(new File(fd, GetFilename()));
   }
 
+  explicit ScratchFile(File* file) {
+    CHECK(file != NULL);
+    filename_ = file->GetPath();
+    file_.reset(file);
+  }
+
   ~ScratchFile() {
-    int unlink_result = unlink(filename_.c_str());
-    CHECK_EQ(0, unlink_result);
+    Unlink();
   }
 
   const std::string& GetFilename() const {
@@ -89,9 +97,17 @@
     return file_->Fd();
   }
 
+  void Unlink() {
+    if (!OS::FileExists(filename_.c_str())) {
+      return;
+    }
+    int unlink_result = unlink(filename_.c_str());
+    CHECK_EQ(0, unlink_result);
+  }
+
  private:
   std::string filename_;
-  UniquePtr<File> file_;
+  std::unique_ptr<File> file_;
 };
 
 class CommonRuntimeTest : public testing::Test {
@@ -255,11 +271,7 @@
       filename += getenv("ANDROID_HOST_OUT");
       filename += "/framework/";
     } else {
-#ifdef __LP64__
-      filename += "/data/nativetest/art64/";
-#else
       filename += "/data/nativetest/art/";
-#endif
     }
     filename += "art-test-dex-";
     filename += name;
@@ -292,7 +304,7 @@
   std::string dalvik_cache_;
   const DexFile* java_lang_dex_file_;  // owned by runtime_
   std::vector<const DexFile*> boot_class_path_;
-  UniquePtr<Runtime> runtime_;
+  std::unique_ptr<Runtime> runtime_;
   // Owned by the runtime
   ClassLinker* class_linker_;
 
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 315f274..a3e3cfa 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -36,8 +36,7 @@
 static void AddReferrerLocation(std::ostream& os, mirror::Class* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (referrer != NULL) {
-    ClassHelper kh(referrer);
-    std::string location(kh.GetLocation());
+    std::string location(referrer->GetLocation());
     if (!location.empty()) {
       os << " (declaration of '" << PrettyDescriptor(referrer)
             << "' appears in " << location << ")";
@@ -297,10 +296,9 @@
 void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
                            const StringPiece& type, const StringPiece& name)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ClassHelper kh(c);
   std::ostringstream msg;
   msg << "No " << scope << "field " << name << " of type " << type
-      << " in class " << kh.GetDescriptor() << " or its superclasses";
+      << " in class " << c->GetDescriptor() << " or its superclasses";
   ThrowException(NULL, "Ljava/lang/NoSuchFieldError;", c, msg.str().c_str());
 }
 
@@ -309,9 +307,8 @@
 void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
                             const Signature& signature) {
   std::ostringstream msg;
-  ClassHelper kh(c);
   msg << "No " << type << " method " << name << signature
-      << " in class " << kh.GetDescriptor() << " or its super classes";
+      << " in class " << c->GetDescriptor() << " or its super classes";
   ThrowException(NULL, "Ljava/lang/NoSuchMethodError;", c, msg.str().c_str());
 }
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1efd2e0..984f287 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -28,6 +28,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
+#include "handle_scope.h"
 #include "jdwp/object_registry.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
@@ -45,8 +46,7 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
-#include "sirt_ref.h"
-#include "stack_indirect_reference_table.h"
+#include "handle_scope-inl.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "utf.h"
@@ -139,7 +139,7 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
+    Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
   }
 
   void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -149,7 +149,7 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
+    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, Dbg::kMethodExit, &return_value);
   }
 
   void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -163,7 +163,7 @@
   void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
                   uint32_t new_dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc);
+    Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, 0, nullptr);
   }
 
   void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -228,6 +228,15 @@
 size_t Dbg::full_deoptimization_event_count_ = 0;
 size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
+// Instrumentation event reference counters.
+size_t Dbg::dex_pc_change_event_ref_count_ = 0;
+size_t Dbg::method_enter_event_ref_count_ = 0;
+size_t Dbg::method_exit_event_ref_count_ = 0;
+size_t Dbg::field_read_event_ref_count_ = 0;
+size_t Dbg::field_write_event_ref_count_ = 0;
+size_t Dbg::exception_catch_event_ref_count_ = 0;
+uint32_t Dbg::instrumentation_events_ = 0;
+
 // Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
 
@@ -595,17 +604,17 @@
 void Dbg::GcDidFinish() {
   if (gDdmHpifWhen != HPIF_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Sending heap info to DDM";
+    VLOG(jdwp) << "Sending heap info to DDM";
     DdmSendHeapInfo(gDdmHpifWhen);
   }
   if (gDdmHpsgWhen != HPSG_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Dumping heap to DDM";
+    VLOG(jdwp) << "Dumping heap to DDM";
     DdmSendHeapSegments(false);
   }
   if (gDdmNhsgWhen != HPSG_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Dumping native heap to DDM";
+    VLOG(jdwp) << "Dumping native heap to DDM";
     DdmSendHeapSegments(true);
   }
 }
@@ -641,14 +650,6 @@
   return gDisposed;
 }
 
-// All the instrumentation events the debugger is registered for.
-static constexpr uint32_t kListenerEvents = instrumentation::Instrumentation::kMethodEntered |
-                                            instrumentation::Instrumentation::kMethodExited |
-                                            instrumentation::Instrumentation::kDexPcMoved |
-                                            instrumentation::Instrumentation::kFieldRead |
-                                            instrumentation::Instrumentation::kFieldWritten |
-                                            instrumentation::Instrumentation::kExceptionCaught;
-
 void Dbg::GoActive() {
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
@@ -668,6 +669,12 @@
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
     CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
+    CHECK_EQ(dex_pc_change_event_ref_count_, 0U);
+    CHECK_EQ(method_enter_event_ref_count_, 0U);
+    CHECK_EQ(method_exit_event_ref_count_, 0U);
+    CHECK_EQ(field_read_event_ref_count_, 0U);
+    CHECK_EQ(field_write_event_ref_count_, 0U);
+    CHECK_EQ(exception_catch_event_ref_count_, 0U);
   }
 
   Runtime* runtime = Runtime::Current();
@@ -676,7 +683,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
   runtime->GetInstrumentation()->EnableDeoptimization();
-  runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener, kListenerEvents);
+  instrumentation_events_ = 0;
   gDebuggerActive = true;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   runtime->GetThreadList()->ResumeAll();
@@ -708,7 +715,11 @@
       full_deoptimization_event_count_ = 0U;
       delayed_full_undeoptimization_count_ = 0U;
     }
-    runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener, kListenerEvents);
+    if (instrumentation_events_ != 0) {
+      runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
+                                                    instrumentation_events_);
+      instrumentation_events_ = 0;
+    }
     runtime->GetInstrumentation()->DisableDeoptimization();
     gDebuggerActive = false;
   }
@@ -745,7 +756,7 @@
   if (!o->IsClass()) {
     return StringPrintf("non-class %p", o);  // This is only used for debugging output anyway.
   }
-  return DescriptorToName(ClassHelper(o->AsClass()).GetDescriptor());
+  return DescriptorToName(o->AsClass()->GetDescriptor().c_str());
 }
 
 JDWP::JdwpError Dbg::GetClassObject(JDWP::RefTypeId id, JDWP::ObjectId& class_object_id) {
@@ -873,7 +884,7 @@
     std::vector<mirror::Object*> monitors;
     std::vector<uint32_t> stack_depths;
   };
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   OwnedMonitorVisitor visitor(thread, context.get());
   visitor.WalkStack();
 
@@ -1077,7 +1088,7 @@
   }
 
   if (pDescriptor != NULL) {
-    *pDescriptor = ClassHelper(c).GetDescriptor();
+    *pDescriptor = c->GetDescriptor();
   }
   return JDWP::ERR_NONE;
 }
@@ -1113,7 +1124,7 @@
   if (c == NULL) {
     return status;
   }
-  *signature = ClassHelper(c).GetDescriptor();
+  *signature = c->GetDescriptor();
   return JDWP::ERR_NONE;
 }
 
@@ -1126,7 +1137,7 @@
   if (c->IsProxyClass()) {
     return JDWP::ERR_ABSENT_INFORMATION;
   }
-  result = ClassHelper(c).GetSourceFile();
+  result = c->GetSourceFile();
   return JDWP::ERR_NONE;
 }
 
@@ -1191,7 +1202,7 @@
     LOG(WARNING) << __FUNCTION__ << " access out of bounds: offset=" << offset << "; count=" << count;
     return JDWP::ERR_INVALID_LENGTH;
   }
-  std::string descriptor(ClassHelper(a->GetClass()).GetDescriptor());
+  std::string descriptor(a->GetClass()->GetDescriptor());
   JDWP::JdwpTag tag = BasicTagFromDescriptor(descriptor.c_str() + 1);
 
   expandBufAdd1(pReply, tag);
@@ -1253,9 +1264,8 @@
     LOG(WARNING) << __FUNCTION__ << " access out of bounds: offset=" << offset << "; count=" << count;
     return JDWP::ERR_INVALID_LENGTH;
   }
-  ClassHelper ch(dst->GetClass());
-  const char* descriptor = ch.GetDescriptor();
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(descriptor + 1);
+  std::string descriptor = dst->GetClass()->GetDescriptor();
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(descriptor.c_str() + 1);
 
   if (IsPrimitiveTag(tag)) {
     size_t width = GetTagWidth(tag);
@@ -1486,16 +1496,17 @@
 
 JDWP::JdwpError Dbg::OutputDeclaredInterfaces(JDWP::RefTypeId class_id, JDWP::ExpandBuf* pReply) {
   JDWP::JdwpError status;
-  mirror::Class* c = DecodeClass(class_id, status);
-  if (c == NULL) {
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> c(hs.NewHandle(DecodeClass(class_id, status)));
+  if (c.Get() == nullptr) {
     return status;
   }
-
-  ClassHelper kh(c);
-  size_t interface_count = kh.NumDirectInterfaces();
+  size_t interface_count = c->NumDirectInterfaces();
   expandBufAdd4BE(pReply, interface_count);
   for (size_t i = 0; i < interface_count; ++i) {
-    expandBufAddRefTypeId(pReply, gRegistry->AddRefType(kh.GetDirectInterface(i)));
+    expandBufAddRefTypeId(pReply,
+                          gRegistry->AddRefType(mirror::Class::GetDirectInterface(self, c, i)));
   }
   return JDWP::ERR_NONE;
 }
@@ -2236,7 +2247,7 @@
       return JDWP::ERR_THREAD_NOT_SUSPENDED;
     }
   }
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   GetThisVisitor visitor(thread, context.get(), frame_id);
   visitor.WalkStack();
   *result = gRegistry->Add(visitor.this_object);
@@ -2384,7 +2395,7 @@
     return error;
   }
   // TODO check thread is suspended by the debugger ?
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   GetLocalVisitor visitor(soa, thread, context.get(), frame_id, slot, tag, buf, width);
   visitor.WalkStack();
   return visitor.error_;
@@ -2481,7 +2492,7 @@
     return error;
   }
   // TODO check thread is suspended by the debugger ?
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   SetLocalVisitor visitor(thread, context.get(), frame_id, slot, tag, value, width);
   visitor.WalkStack();
   return visitor.error_;
@@ -2581,18 +2592,16 @@
   // since the class may not yet be verified.
   int state = JDWP::CS_VERIFIED | JDWP::CS_PREPARED;
   JDWP::JdwpTypeTag tag = GetTypeTag(c);
-  gJdwpState->PostClassPrepare(tag, gRegistry->Add(c),
-                               ClassHelper(c).GetDescriptor(), state);
+  gJdwpState->PostClassPrepare(tag, gRegistry->Add(c), c->GetDescriptor(), state);
 }
 
 void Dbg::UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                         mirror::ArtMethod* m, uint32_t dex_pc) {
+                         mirror::ArtMethod* m, uint32_t dex_pc,
+                         int event_flags, const JValue* return_value) {
   if (!IsDebuggerActive() || dex_pc == static_cast<uint32_t>(-2) /* fake method exit */) {
     return;
   }
 
-  int event_flags = 0;
-
   if (IsBreakpoint(m, dex_pc)) {
     event_flags |= kBreakpoint;
   }
@@ -2660,7 +2669,26 @@
   // If there's something interesting going on, see if it matches one
   // of the debugger filters.
   if (event_flags != 0) {
-    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags, nullptr);
+    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags, return_value);
+  }
+}
+
+size_t* Dbg::GetReferenceCounterForEvent(uint32_t instrumentation_event) {
+  switch (instrumentation_event) {
+    case instrumentation::Instrumentation::kMethodEntered:
+      return &method_enter_event_ref_count_;
+    case instrumentation::Instrumentation::kMethodExited:
+      return &method_exit_event_ref_count_;
+    case instrumentation::Instrumentation::kDexPcMoved:
+      return &dex_pc_change_event_ref_count_;
+    case instrumentation::Instrumentation::kFieldRead:
+      return &field_read_event_ref_count_;
+    case instrumentation::Instrumentation::kFieldWritten:
+      return &field_write_event_ref_count_;
+    case instrumentation::Instrumentation::kExceptionCaught:
+      return &exception_catch_event_ref_count_;
+    default:
+      return nullptr;
   }
 }
 
@@ -2671,6 +2699,19 @@
     case DeoptimizationRequest::kNothing:
       LOG(WARNING) << "Ignoring empty deoptimization request.";
       break;
+    case DeoptimizationRequest::kRegisterForEvent:
+      VLOG(jdwp) << StringPrintf("Add debugger as listener for instrumentation event 0x%x",
+                                 request.instrumentation_event);
+      instrumentation->AddListener(&gDebugInstrumentationListener, request.instrumentation_event);
+      instrumentation_events_ |= request.instrumentation_event;
+      break;
+    case DeoptimizationRequest::kUnregisterForEvent:
+      VLOG(jdwp) << StringPrintf("Remove debugger as listener for instrumentation event 0x%x",
+                                 request.instrumentation_event);
+      instrumentation->RemoveListener(&gDebugInstrumentationListener,
+                                      request.instrumentation_event);
+      instrumentation_events_ &= ~request.instrumentation_event;
+      break;
     case DeoptimizationRequest::kFullDeoptimization:
       VLOG(jdwp) << "Deoptimize the world ...";
       instrumentation->DeoptimizeEverything();
@@ -2729,6 +2770,32 @@
 
 void Dbg::RequestDeoptimizationLocked(const DeoptimizationRequest& req) {
   switch (req.kind) {
+    case DeoptimizationRequest::kRegisterForEvent: {
+      DCHECK_NE(req.instrumentation_event, 0u);
+      size_t* counter = GetReferenceCounterForEvent(req.instrumentation_event);
+      CHECK(counter != nullptr) << StringPrintf("No counter for instrumentation event 0x%x",
+                                                req.instrumentation_event);
+      if (*counter == 0) {
+        VLOG(jdwp) << StringPrintf("Queue request #%zd to start listening to instrumentation event 0x%x",
+                                   deoptimization_requests_.size(), req.instrumentation_event);
+        deoptimization_requests_.push_back(req);
+      }
+      *counter = *counter + 1;
+      break;
+    }
+    case DeoptimizationRequest::kUnregisterForEvent: {
+      DCHECK_NE(req.instrumentation_event, 0u);
+      size_t* counter = GetReferenceCounterForEvent(req.instrumentation_event);
+      CHECK(counter != nullptr) << StringPrintf("No counter for instrumentation event 0x%x",
+                                                req.instrumentation_event);
+      *counter = *counter - 1;
+      if (*counter == 0) {
+        VLOG(jdwp) << StringPrintf("Queue request #%zd to stop listening to instrumentation event 0x%x",
+                                   deoptimization_requests_.size(), req.instrumentation_event);
+        deoptimization_requests_.push_back(req);
+      }
+      break;
+    }
     case DeoptimizationRequest::kFullDeoptimization: {
       DCHECK(req.method == nullptr);
       if (full_deoptimization_event_count_ == 0) {
@@ -2809,11 +2876,12 @@
     // should never be null. We could just check we never encounter this case.
     return false;
   }
-  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
   verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
                                     &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
-                                    m->GetAccessFlags(), false, true);
+                                    m->GetAccessFlags(), false, true, false);
   // Note: we don't need to verify the method.
   return InlineMethodAnalyser::AnalyseMethodCode(&verifier, nullptr);
 }
@@ -3341,43 +3409,44 @@
 
   // We can be called while an exception is pending. We need
   // to preserve that across the method invocation.
-  SirtRef<mirror::Object> old_throw_this_object(soa.Self(), NULL);
-  SirtRef<mirror::ArtMethod> old_throw_method(soa.Self(), NULL);
-  SirtRef<mirror::Throwable> old_exception(soa.Self(), NULL);
+  StackHandleScope<4> hs(soa.Self());
+  auto old_throw_this_object = hs.NewHandle<mirror::Object>(nullptr);
+  auto old_throw_method = hs.NewHandle<mirror::ArtMethod>(nullptr);
+  auto old_exception = hs.NewHandle<mirror::Throwable>(nullptr);
   uint32_t old_throw_dex_pc;
   {
     ThrowLocation old_throw_location;
     mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
-    old_throw_this_object.reset(old_throw_location.GetThis());
-    old_throw_method.reset(old_throw_location.GetMethod());
-    old_exception.reset(old_exception_obj);
+    old_throw_this_object.Assign(old_throw_location.GetThis());
+    old_throw_method.Assign(old_throw_location.GetMethod());
+    old_exception.Assign(old_exception_obj);
     old_throw_dex_pc = old_throw_location.GetDexPc();
     soa.Self()->ClearException();
   }
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
-  SirtRef<mirror::ArtMethod> m(soa.Self(), pReq->method);
+  Handle<mirror::ArtMethod> m(hs.NewHandle(pReq->method));
   if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver != NULL) {
-    mirror::ArtMethod* actual_method = pReq->klass->FindVirtualMethodForVirtualOrInterface(m.get());
-    if (actual_method != m.get()) {
-      VLOG(jdwp) << "ExecuteMethod translated " << PrettyMethod(m.get()) << " to " << PrettyMethod(actual_method);
-      m.reset(actual_method);
+    mirror::ArtMethod* actual_method = pReq->klass->FindVirtualMethodForVirtualOrInterface(m.Get());
+    if (actual_method != m.Get()) {
+      VLOG(jdwp) << "ExecuteMethod translated " << PrettyMethod(m.Get()) << " to " << PrettyMethod(actual_method);
+      m.Assign(actual_method);
     }
   }
-  VLOG(jdwp) << "ExecuteMethod " << PrettyMethod(m.get())
+  VLOG(jdwp) << "ExecuteMethod " << PrettyMethod(m.Get())
              << " receiver=" << pReq->receiver
              << " arg_count=" << pReq->arg_count;
-  CHECK(m.get() != nullptr);
+  CHECK(m.Get() != nullptr);
 
   CHECK_EQ(sizeof(jvalue), sizeof(uint64_t));
 
-  pReq->result_value = InvokeWithJValues(soa, pReq->receiver, soa.EncodeMethod(m.get()),
+  pReq->result_value = InvokeWithJValues(soa, pReq->receiver, soa.EncodeMethod(m.Get()),
                                          reinterpret_cast<jvalue*>(pReq->arg_values));
 
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
   soa.Self()->ClearException();
   pReq->exception = gRegistry->Add(exception);
-  pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m.get()).GetShorty());
+  pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m.Get()).GetShorty());
   if (pReq->exception != 0) {
     VLOG(jdwp) << "  JDWP invocation returning with exception=" << exception
         << " " << exception->Dump();
@@ -3402,10 +3471,10 @@
     gRegistry->Add(pReq->result_value.GetL());
   }
 
-  if (old_exception.get() != NULL) {
-    ThrowLocation gc_safe_throw_location(old_throw_this_object.get(), old_throw_method.get(),
+  if (old_exception.Get() != NULL) {
+    ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
-    soa.Self()->SetException(gc_safe_throw_location, old_exception.get());
+    soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
   }
 }
 
@@ -3547,9 +3616,10 @@
   } else {
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    SirtRef<mirror::String> name(soa.Self(), t->GetThreadName(soa));
-    size_t char_count = (name.get() != NULL) ? name->GetLength() : 0;
-    const jchar* chars = (name.get() != NULL) ? name->GetCharArray()->GetData() : NULL;
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
+    size_t char_count = (name.Get() != NULL) ? name->GetLength() : 0;
+    const jchar* chars = (name.Get() != NULL) ? name->GetCharArray()->GetData() : NULL;
 
     std::vector<uint8_t> bytes;
     JDWP::Append4BE(bytes, t->GetThreadId());
@@ -3969,7 +4039,11 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
+#ifdef USE_DLMALLOC
     dlmalloc_inspect_all(HeapChunkContext::HeapChunkCallback, &context);
+#else
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
+#endif
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
@@ -4205,7 +4279,7 @@
     for (const std::string& str : table_) {
       const char* s = str.c_str();
       size_t s_len = CountModifiedUtf8Chars(s);
-      UniquePtr<uint16_t> s_utf16(new uint16_t[s_len]);
+      std::unique_ptr<uint16_t> s_utf16(new uint16_t[s_len]);
       ConvertModifiedUtf8ToUtf16(s_utf16.get(), s);
       JDWP::AppendUtf16BE(bytes, s_utf16.get(), s_len);
     }
@@ -4286,7 +4360,7 @@
     while (count--) {
       AllocRecord* record = &recent_allocation_records_[idx];
 
-      class_names.Add(ClassHelper(record->type).GetDescriptor());
+      class_names.Add(record->type->GetDescriptor().c_str());
 
       MethodHelper mh;
       for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) {
@@ -4340,8 +4414,8 @@
       // (1b) stack depth
       AllocRecord* record = &recent_allocation_records_[idx];
       size_t stack_depth = record->GetDepth();
-      ClassHelper kh(record->type);
-      size_t allocated_object_class_name_index = class_names.IndexOf(kh.GetDescriptor());
+      size_t allocated_object_class_name_index =
+          class_names.IndexOf(record->type->GetDescriptor().c_str());
       JDWP::Append4BE(bytes, record->byte_count);
       JDWP::Append2BE(bytes, record->thin_lock_id);
       JDWP::Append2BE(bytes, allocated_object_class_name_index);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index bef708c..31ffd6e 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -129,21 +129,31 @@
   DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
 
+// TODO rename to InstrumentationRequest.
 struct DeoptimizationRequest {
   enum Kind {
     kNothing,                   // no action.
+    kRegisterForEvent,          // start listening for instrumentation event.
+    kUnregisterForEvent,        // stop listening for instrumentation event.
     kFullDeoptimization,        // deoptimize everything.
     kFullUndeoptimization,      // undeoptimize everything.
     kSelectiveDeoptimization,   // deoptimize one method.
     kSelectiveUndeoptimization  // undeoptimize one method.
   };
 
-  DeoptimizationRequest() : kind(kNothing), method(nullptr) {}
+  DeoptimizationRequest() : kind(kNothing), instrumentation_event(0), method(nullptr) {}
 
   void VisitRoots(RootCallback* callback, void* arg);
 
   Kind kind;
 
+  // TODO we could use a union to hold the instrumentation_event and the method since they
+  // respectively have sense only for kRegisterForEvent/kUnregisterForEvent and
+  // kSelectiveDeoptimization/kSelectiveUndeoptimization.
+
+  // Event to start or stop listening to. Only for kRegisterForEvent and kUnregisterForEvent.
+  uint32_t instrumentation_event;
+
   // Method for selective deoptimization.
   mirror::ArtMethod* method;
 };
@@ -417,10 +427,6 @@
     kMethodEntry    = 0x04,
     kMethodExit     = 0x08,
   };
-  static void PostLocationEvent(mirror::ArtMethod* method, int pcOffset,
-                                mirror::Object* thisPtr, int eventFlags,
-                                const JValue* return_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
                                    mirror::ArtField* f)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -439,7 +445,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                             mirror::ArtMethod* method, uint32_t new_dex_pc)
+                             mirror::ArtMethod* method, uint32_t new_dex_pc,
+                             int event_flags, const JValue* return_value)
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -561,6 +568,11 @@
   static void PostThreadStartOrStop(Thread*, uint32_t)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static void PostLocationEvent(mirror::ArtMethod* method, int pcOffset,
+                                mirror::Object* thisPtr, int eventFlags,
+                                const JValue* return_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static JDWP::ObjectId GetThisObjectIdForEvent(mirror::Object* this_object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -579,11 +591,13 @@
   static size_t alloc_record_count_ GUARDED_BY(alloc_tracker_lock_);
 
   // Guards deoptimization requests.
+  // TODO rename to instrumentation_update_lock.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(Locks::breakpoint_lock_);
 
   // Deoptimization requests to be processed each time the event list is updated. This is used when
   // registering and unregistering events so we do not deoptimize while holding the event list
   // lock.
+  // TODO rename to instrumentation_requests.
   static std::vector<DeoptimizationRequest> deoptimization_requests_ GUARDED_BY(deoptimization_lock_);
 
   // Count the number of events requiring full deoptimization. When the counter is > 0, everything
@@ -596,6 +610,19 @@
   // session.
   static size_t delayed_full_undeoptimization_count_ GUARDED_BY(deoptimization_lock_);
 
+  static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event);
+
+  // Instrumentation event reference counters.
+  // TODO we could use an array instead of having all these dedicated counters. Instrumentation
+  // events are bits of a mask so we could convert them to array index.
+  static size_t dex_pc_change_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static size_t method_enter_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static size_t method_exit_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static size_t field_read_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static size_t field_write_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static size_t exception_catch_event_ref_count_ GUARDED_BY(deoptimization_lock_);
+  static uint32_t instrumentation_events_ GUARDED_BY(Locks::mutator_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(Dbg);
 };
 
diff --git a/runtime/deoptimize_stack_visitor.cc b/runtime/deoptimize_stack_visitor.cc
index 3eb1792..449ccce 100644
--- a/runtime/deoptimize_stack_visitor.cc
+++ b/runtime/deoptimize_stack_visitor.cc
@@ -19,7 +19,7 @@
 #include "mirror/art_method-inl.h"
 #include "object_utils.h"
 #include "quick_exception_handler.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -50,11 +50,12 @@
   const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
   uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
   ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, m, new_dex_pc);
-  SirtRef<mirror::DexCache> dex_cache(self_, mh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self_, mh.GetClassLoader());
+  StackHandleScope<2> hs(self_);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
   verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
                                     &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
-                                    m->GetAccessFlags(), false, true);
+                                    m->GetAccessFlags(), false, true, true);
   verifier.Verify();
   std::vector<int32_t> kinds = verifier.DescribeVRegs(dex_pc);
   for (uint16_t reg = 0; reg < num_regs; ++reg) {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 6adfc1f..43ae308 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -23,6 +23,7 @@
 #include <string.h>
 #include <sys/file.h>
 #include <sys/stat.h>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
@@ -37,9 +38,8 @@
 #include "os.h"
 #include "safe_map.h"
 #include "ScopedFd.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
 #include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
@@ -93,12 +93,12 @@
     return false;
   }
   if (IsZipMagic(magic)) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
     if (zip_archive.get() == NULL) {
       *error_msg = StringPrintf("Failed to open zip archive '%s'", filename);
       return false;
     }
-    UniquePtr<ZipEntry> zip_entry(zip_archive->Find(kClassesDex, error_msg));
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kClassesDex, error_msg));
     if (zip_entry.get() == NULL) {
       *error_msg = StringPrintf("Zip archive '%s' doesn't contain %s (error msg: %s)", filename,
                                 kClassesDex, error_msg->c_str());
@@ -108,7 +108,7 @@
     return true;
   }
   if (IsDexMagic(magic)) {
-    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
     if (dex_file.get() == NULL) {
       return false;
     }
@@ -171,7 +171,7 @@
 const DexFile* DexFile::OpenFile(int fd, const char* location, bool verify,
                                  std::string* error_msg) {
   CHECK(location != nullptr);
-  UniquePtr<MemMap> map;
+  std::unique_ptr<MemMap> map;
   {
     ScopedFd delayed_close(fd);
     struct stat sbuf;
@@ -218,7 +218,7 @@
 const char* DexFile::kClassesDex = "classes.dex";
 
 const DexFile* DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg) {
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
   if (zip_archive.get() == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
@@ -241,17 +241,17 @@
 const DexFile* DexFile::Open(const ZipArchive& zip_archive, const std::string& location,
                              std::string* error_msg) {
   CHECK(!location.empty());
-  UniquePtr<ZipEntry> zip_entry(zip_archive.Find(kClassesDex, error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(kClassesDex, error_msg));
   if (zip_entry.get() == NULL) {
     return nullptr;
   }
-  UniquePtr<MemMap> map(zip_entry->ExtractToMemMap(kClassesDex, error_msg));
+  std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(kClassesDex, error_msg));
   if (map.get() == NULL) {
     *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", kClassesDex, location.c_str(),
                               error_msg->c_str());
     return nullptr;
   }
-  UniquePtr<const DexFile> dex_file(OpenMemory(location, zip_entry->GetCrc32(), map.release(),
+  std::unique_ptr<const DexFile> dex_file(OpenMemory(location, zip_entry->GetCrc32(), map.release(),
                                                error_msg));
   if (dex_file.get() == nullptr) {
     *error_msg = StringPrintf("Failed to open dex file '%s' from memory: %s", location.c_str(),
@@ -276,7 +276,7 @@
                                    uint32_t location_checksum,
                                    MemMap* mem_map, std::string* error_msg) {
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
-  UniquePtr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
+  std::unique_ptr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
   if (!dex_file->Init(error_msg)) {
     return nullptr;
   } else {
@@ -838,7 +838,7 @@
                               void* context) const {
   DCHECK(code_item != nullptr);
   const byte* stream = GetDebugInfoStream(code_item);
-  UniquePtr<LocalInfo[]> local_in_reg(local_cb != NULL ?
+  std::unique_ptr<LocalInfo[]> local_in_reg(local_cb != NULL ?
                                       new LocalInfo[code_item->registers_size_] :
                                       NULL);
   if (stream != NULL) {
@@ -1005,8 +1005,8 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(const DexFile& dex_file,
-                                                                 SirtRef<mirror::DexCache>* dex_cache,
-                                                                 SirtRef<mirror::ClassLoader>* class_loader,
+                                                                 Handle<mirror::DexCache>* dex_cache,
+                                                                 Handle<mirror::ClassLoader>* class_loader,
                                                                  ClassLinker* linker,
                                                                  const DexFile::ClassDef& class_def)
     : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index c782ab1..1d5032d 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_DEX_FILE_H_
 #define ART_RUNTIME_DEX_FILE_H_
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -28,7 +29,6 @@
 #include "mem_map.h"
 #include "modifiers.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -42,8 +42,7 @@
 }  // namespace mirror
 class ClassLinker;
 class Signature;
-template <typename T>
-class SirtRef;
+template<class T> class Handle;
 class StringPiece;
 class ZipArchive;
 
@@ -871,7 +870,7 @@
   const uint32_t location_checksum_;
 
   // Manages the underlying memory allocation.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Points to the header section.
   const Header* const header_;
@@ -1127,8 +1126,8 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, SirtRef<mirror::DexCache>* dex_cache,
-                                  SirtRef<mirror::ClassLoader>* class_loader,
+  EncodedStaticFieldValueIterator(const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
+                                  Handle<mirror::ClassLoader>* class_loader,
                                   ClassLinker* linker, const DexFile::ClassDef& class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1163,8 +1162,8 @@
   static const byte kEncodedValueArgShift = 5;
 
   const DexFile& dex_file_;
-  SirtRef<mirror::DexCache>* const dex_cache_;  // Dex cache to resolve literal objects.
-  SirtRef<mirror::ClassLoader>* const class_loader_;  // ClassLoader to resolve types.
+  Handle<mirror::DexCache>* const dex_cache_;  // Dex cache to resolve literal objects.
+  Handle<mirror::ClassLoader>* const class_loader_;  // ClassLoader to resolve types.
   ClassLinker* linker_;  // Linker to resolve literal objects.
   size_t array_size_;  // Size of array.
   size_t pos_;  // Current position.
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 9b6859a..a814c34 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -16,7 +16,8 @@
 
 #include "dex_file.h"
 
-#include "UniquePtr.h"
+#include <memory>
+
 #include "common_runtime_test.h"
 
 namespace art {
@@ -90,7 +91,7 @@
     *dst_size = 0;
     return nullptr;
   }
-  UniquePtr<byte[]> dst(new byte[tmp.size()]);
+  std::unique_ptr<byte[]> dst(new byte[tmp.size()]);
   if (dst_size != nullptr) {
     *dst_size = tmp.size();
   } else {
@@ -131,11 +132,11 @@
   // decode base64
   CHECK(base64 != NULL);
   size_t length;
-  UniquePtr<byte[]> dex_bytes(DecodeBase64(base64, &length));
+  std::unique_ptr<byte[]> dex_bytes(DecodeBase64(base64, &length));
   CHECK(dex_bytes.get() != NULL);
 
   // write to provided file
-  UniquePtr<File> file(OS::CreateEmptyFile(location));
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
   CHECK(file.get() != NULL);
   if (!file->WriteFully(dex_bytes.get(), length)) {
     PLOG(FATAL) << "Failed to write base64 as dex file";
@@ -154,7 +155,7 @@
 
 TEST_F(DexFileTest, Header) {
   ScratchFile tmp;
-  UniquePtr<const DexFile> raw(OpenDexFileBase64(kRawDex, tmp.GetFilename().c_str()));
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex, tmp.GetFilename().c_str()));
   ASSERT_TRUE(raw.get() != NULL);
 
   const DexFile::Header& header = raw->GetHeader();
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 528e112..a1c8c71 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -17,12 +17,12 @@
 #include "dex_file_verifier.h"
 
 #include <zlib.h>
+#include <memory>
 
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "leb128.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 #include "utf-inl.h"
 #include "utils.h"
 
@@ -68,7 +68,7 @@
 
 bool DexFileVerifier::Verify(const DexFile* dex_file, const byte* begin, size_t size,
                              const char* location, std::string* error_msg) {
-  UniquePtr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
+  std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
   if (!verifier->Verify()) {
     *error_msg = verifier->FailureReason();
     return false;
@@ -652,7 +652,7 @@
     return false;
   }
 
-  UniquePtr<uint32_t[]> handler_offsets(new uint32_t[handlers_size]);
+  std::unique_ptr<uint32_t[]> handler_offsets(new uint32_t[handlers_size]);
   if (!CheckAndGetHandlerOffsets(code_item, &handler_offsets[0], handlers_size)) {
     return false;
   }
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 207b0b6..ad9491f 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -24,6 +24,68 @@
 //------------------------------------------------------------------------------
 // VRegA
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegA() const {
+  switch (FormatOf(Opcode())) {
+    case k10t: return true;
+    case k10x: return true;
+    case k11n: return true;
+    case k11x: return true;
+    case k12x: return true;
+    case k20t: return true;
+    case k21c: return true;
+    case k21h: return true;
+    case k21s: return true;
+    case k21t: return true;
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k22x: return true;
+    case k23x: return true;
+    case k30t: return true;
+    case k31c: return true;
+    case k31i: return true;
+    case k31t: return true;
+    case k32x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    case k51l: return true;
+    default: return false;
+  }
+}
+
+inline int32_t Instruction::VRegA() const {
+  switch (FormatOf(Opcode())) {
+    case k10t: return VRegA_10t();
+    case k10x: return VRegA_10x();
+    case k11n: return VRegA_11n();
+    case k11x: return VRegA_11x();
+    case k12x: return VRegA_12x();
+    case k20t: return VRegA_20t();
+    case k21c: return VRegA_21c();
+    case k21h: return VRegA_21h();
+    case k21s: return VRegA_21s();
+    case k21t: return VRegA_21t();
+    case k22b: return VRegA_22b();
+    case k22c: return VRegA_22c();
+    case k22s: return VRegA_22s();
+    case k22t: return VRegA_22t();
+    case k22x: return VRegA_22x();
+    case k23x: return VRegA_23x();
+    case k30t: return VRegA_30t();
+    case k31c: return VRegA_31c();
+    case k31i: return VRegA_31i();
+    case k31t: return VRegA_31t();
+    case k32x: return VRegA_32x();
+    case k35c: return VRegA_35c();
+    case k3rc: return VRegA_3rc();
+    case k51l: return VRegA_51l();
+    default:
+      LOG(FATAL) << "Tried to access vA of instruction " << Name() << " which has no A operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
 inline int8_t Instruction::VRegA_10t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k10t);
   return static_cast<int8_t>(InstAA(inst_data));
@@ -147,6 +209,66 @@
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegB() const {
+  switch (FormatOf(Opcode())) {
+    case k11n: return true;
+    case k12x: return true;
+    case k21c: return true;
+    case k21h: return true;
+    case k21s: return true;
+    case k21t: return true;
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k22x: return true;
+    case k23x: return true;
+    case k31c: return true;
+    case k31i: return true;
+    case k31t: return true;
+    case k32x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    case k51l: return true;
+    default: return false;
+  }
+}
+
+inline bool Instruction::HasWideVRegB() const {
+  return FormatOf(Opcode()) == k51l;
+}
+
+inline int32_t Instruction::VRegB() const {
+  switch (FormatOf(Opcode())) {
+    case k11n: return VRegB_11n();
+    case k12x: return VRegB_12x();
+    case k21c: return VRegB_21c();
+    case k21h: return VRegB_21h();
+    case k21s: return VRegB_21s();
+    case k21t: return VRegB_21t();
+    case k22b: return VRegB_22b();
+    case k22c: return VRegB_22c();
+    case k22s: return VRegB_22s();
+    case k22t: return VRegB_22t();
+    case k22x: return VRegB_22x();
+    case k23x: return VRegB_23x();
+    case k31c: return VRegB_31c();
+    case k31i: return VRegB_31i();
+    case k31t: return VRegB_31t();
+    case k32x: return VRegB_32x();
+    case k35c: return VRegB_35c();
+    case k3rc: return VRegB_3rc();
+    case k51l: return VRegB_51l();
+    default:
+      LOG(FATAL) << "Tried to access vB of instruction " << Name() << " which has no B operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
+inline uint64_t Instruction::WideVRegB() const {
+  return VRegB_51l();
+}
+
 inline int4_t Instruction::VRegB_11n(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k11n);
   return static_cast<int4_t>((InstB(inst_data) << 28) >> 28);
@@ -246,6 +368,34 @@
 //------------------------------------------------------------------------------
 // VRegC
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegC() const {
+  switch (FormatOf(Opcode())) {
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k23x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    default: return false;
+  }
+}
+
+inline int32_t Instruction::VRegC() const {
+  switch (FormatOf(Opcode())) {
+    case k22b: return VRegC_22b();
+    case k22c: return VRegC_22c();
+    case k22s: return VRegC_22s();
+    case k22t: return VRegC_22t();
+    case k23x: return VRegC_23x();
+    case k35c: return VRegC_35c();
+    case k3rc: return VRegC_3rc();
+    default:
+      LOG(FATAL) << "Tried to access vC of instruction " << Name() << " which has no C operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
 inline int8_t Instruction::VRegC_22b() const {
   DCHECK_EQ(FormatOf(Opcode()), k22b);
   return static_cast<int8_t>(Fetch16(1) >> 8);
@@ -281,7 +431,11 @@
   return Fetch16(2);
 }
 
-inline void Instruction::GetArgs(uint32_t arg[5], uint16_t inst_data) const {
+inline bool Instruction::HasVarArgs() const {
+  return FormatOf(Opcode()) == k35c;
+}
+
+inline void Instruction::GetVarArgs(uint32_t arg[5], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 7546245..0494f22 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -70,121 +70,6 @@
 #undef INSTRUCTION_SIZE
 };
 
-/*
- * Handy macros for helping decode instructions.
- */
-#define FETCH(_offset)      (insns[(_offset)])
-#define FETCH_uint32(_offset)   (fetch_uint32_impl((_offset), insns))
-#define INST_A(_insn)       (((uint16_t)(_insn) >> 8) & 0x0f)
-#define INST_B(_insn)       ((uint16_t)(_insn) >> 12)
-#define INST_AA(_insn)      ((_insn) >> 8)
-
-/* Helper for FETCH_uint32, above. */
-static inline uint32_t fetch_uint32_impl(uint32_t offset, const uint16_t* insns) {
-  return insns[offset] | ((uint32_t) insns[offset+1] << 16);
-}
-
-
-bool Instruction::HasVRegC() const {
-  switch (FormatOf(Opcode())) {
-    case k23x: return true;
-    case k35c: return true;
-    case k3rc: return true;
-    default: return false;
-  }
-}
-
-bool Instruction::HasVRegB() const {
-  switch (FormatOf(Opcode())) {
-    case k12x: return true;
-    case k22b: return true;
-    case k22c: return true;
-    case k22s: return true;
-    case k22t: return true;
-    case k22x: return true;
-    case k23x: return true;
-    case k32x: return true;
-    default: return false;
-  }
-}
-
-bool Instruction::HasVRegA() const {
-  switch (FormatOf(Opcode())) {
-    case k11n: return true;
-    case k11x: return true;
-    case k12x: return true;
-    case k21c: return true;
-    case k21h: return true;
-    case k21s: return true;
-    case k21t: return true;
-    case k22b: return true;
-    case k22c: return true;
-    case k22s: return true;
-    case k22t: return true;
-    case k22x: return true;
-    case k23x: return true;
-    case k31c: return true;
-    case k31i: return true;
-    case k31t: return true;
-    case k32x: return true;
-    case k51l: return true;
-    default: return false;
-  }
-}
-
-int32_t Instruction::VRegC() const {
-  switch (FormatOf(Opcode())) {
-    case k23x: return VRegC_23x();
-    case k35c: return VRegC_35c();
-    case k3rc: return VRegC_3rc();
-    default: LOG(FATAL) << "Tried to access vC of instruction " << Name() <<
-        " which has no C operand.";
-  }
-  return -1;
-}
-
-int32_t Instruction::VRegB() const {
-  switch (FormatOf(Opcode())) {
-    case k12x: return VRegB_12x();
-    case k22b: return VRegB_22b();
-    case k22c: return VRegB_22c();
-    case k22s: return VRegB_22s();
-    case k22t: return VRegB_22t();
-    case k22x: return VRegB_22x();
-    case k23x: return VRegB_23x();
-    case k32x: return VRegB_32x();
-    default: LOG(FATAL) << "Tried to access vB of instruction " << Name() <<
-        " which has no B operand.";
-  }
-  return -1;
-}
-
-int32_t Instruction::VRegA() const {
-  switch (FormatOf(Opcode())) {
-    case k11n: return VRegA_11n();
-    case k11x: return VRegA_11x();
-    case k12x: return VRegA_12x();
-    case k21c: return VRegA_21c();
-    case k21h: return VRegA_21h();
-    case k21s: return VRegA_21s();
-    case k21t: return VRegA_21t();
-    case k22b: return VRegA_22b();
-    case k22c: return VRegA_22c();
-    case k22s: return VRegA_22s();
-    case k22t: return VRegA_22t();
-    case k22x: return VRegA_22x();
-    case k23x: return VRegA_23x();
-    case k31c: return VRegA_31c();
-    case k31i: return VRegA_31i();
-    case k31t: return VRegA_31t();
-    case k32x: return VRegA_32x();
-    case k51l: return VRegA_51l();
-    default: LOG(FATAL) << "Tried to access vA of instruction " << Name() <<
-        " which has no A operand.";
-  }
-  return -1;
-}
-
 int32_t Instruction::GetTargetOffset() const {
   switch (FormatOf(Opcode())) {
     // Cases for conditional branches follow.
@@ -207,145 +92,6 @@
   return  FlagsOf(opcode) & Instruction::kContinue;
 }
 
-void Instruction::Decode(uint32_t &vA, uint32_t &vB, uint64_t &vB_wide, uint32_t &vC, uint32_t arg[]) const {
-  const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
-  uint16_t insn = *insns;
-  Code opcode = static_cast<Code>(insn & 0xFF);
-
-  switch (FormatOf(opcode)) {
-    case k10x:       // op
-      /* nothing to do; copy the AA bits out for the verifier */
-      vA = INST_AA(insn);
-      break;
-    case k12x:       // op vA, vB
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      break;
-    case k11n:       // op vA, #+B
-      vA = INST_A(insn);
-      vB = (int32_t) (INST_B(insn) << 28) >> 28;  // sign extend 4-bit value
-      break;
-    case k11x:       // op vAA
-      vA = INST_AA(insn);
-      break;
-    case k10t:       // op +AA
-      vA = (int8_t) INST_AA(insn);              // sign-extend 8-bit value
-      break;
-    case k20t:       // op +AAAA
-      vA = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k21c:       // op vAA, thing@BBBB
-    case k22x:       // op vAA, vBBBB
-      vA = INST_AA(insn);
-      vB = FETCH(1);
-      break;
-    case k21s:       // op vAA, #+BBBB
-    case k21t:       // op vAA, +BBBB
-      vA = INST_AA(insn);
-      vB = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k21h:       // op vAA, #+BBBB0000[00000000]
-      vA = INST_AA(insn);
-      /*
-       * The value should be treated as right-zero-extended, but we don't
-       * actually do that here. Among other things, we don't know if it's
-       * the top bits of a 32- or 64-bit value.
-       */
-      vB = FETCH(1);
-      break;
-    case k23x:       // op vAA, vBB, vCC
-      vA = INST_AA(insn);
-      vB = FETCH(1) & 0xff;
-      vC = FETCH(1) >> 8;
-      break;
-    case k22b:       // op vAA, vBB, #+CC
-      vA = INST_AA(insn);
-      vB = FETCH(1) & 0xff;
-      vC = (int8_t) (FETCH(1) >> 8);            // sign-extend 8-bit value
-      break;
-    case k22s:       // op vA, vB, #+CCCC
-    case k22t:       // op vA, vB, +CCCC
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      vC = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k22c:       // op vA, vB, thing@CCCC
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      vC = FETCH(1);
-      break;
-    case k30t:       // op +AAAAAAAA
-      vA = FETCH_uint32(1);                     // signed 32-bit value
-      break;
-    case k31t:       // op vAA, +BBBBBBBB
-    case k31c:       // op vAA, string@BBBBBBBB
-      vA = INST_AA(insn);
-      vB = FETCH_uint32(1);                     // 32-bit value
-      break;
-    case k32x:       // op vAAAA, vBBBB
-      vA = FETCH(1);
-      vB = FETCH(2);
-      break;
-    case k31i:       // op vAA, #+BBBBBBBB
-      vA = INST_AA(insn);
-      vB = FETCH_uint32(1);                     // signed 32-bit value
-      break;
-    case k35c:       // op {vC, vD, vE, vF, vG}, thing@BBBB
-      {
-        /*
-         * Note that the fields mentioned in the spec don't appear in
-         * their "usual" positions here compared to most formats. This
-         * was done so that the field names for the argument count and
-         * reference index match between this format and the corresponding
-         * range formats (3rc and friends).
-         *
-         * Bottom line: The argument count is always in vA, and the
-         * method constant (or equivalent) is always in vB.
-         */
-        uint16_t regList;
-        int count;
-
-        vA = INST_B(insn);  // This is labeled A in the spec.
-        vB = FETCH(1);
-        regList = FETCH(2);
-
-        count = vA;
-
-        /*
-         * Copy the argument registers into the arg[] array, and
-         * also copy the first argument (if any) into vC. (The
-         * DecodedInstruction structure doesn't have separate
-         * fields for {vD, vE, vF, vG}, so there's no need to make
-         * copies of those.) Note that cases 5..2 fall through.
-         */
-        switch (count) {
-        case 5: arg[4] = INST_A(insn);
-        case 4: arg[3] = (regList >> 12) & 0x0f;
-        case 3: arg[2] = (regList >> 8) & 0x0f;
-        case 2: arg[1] = (regList >> 4) & 0x0f;
-        case 1: vC = arg[0] = regList & 0x0f; break;
-        case 0: break;  // Valid, but no need to do anything.
-        default:
-          LOG(ERROR) << "Invalid arg count in 35c (" << count << ")";
-          return;
-        }
-      }
-      break;
-    case k3rc:       // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
-      vA = INST_AA(insn);
-      vB = FETCH(1);
-      vC = FETCH(2);
-        break;
-    case k51l:       // op vAA, #+BBBBBBBBBBBBBBBB
-      vA = INST_AA(insn);
-      vB_wide = FETCH_uint32(1) | ((uint64_t) FETCH_uint32(3) << 32);
-      break;
-    default:
-      LOG(ERROR) << "Can't decode unexpected format " << FormatOf(opcode) << " (op=" << opcode << ")";
-      return;
-  }
-}
-
 size_t Instruction::SizeInCodeUnitsComplexOpcode() const {
   const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
   // Handle special NOP encoded variable length sequences.
@@ -549,7 +295,7 @@
       break;
     case k35c: {
       uint32_t arg[5];
-      GetArgs(arg);
+      GetVarArgs(arg);
       switch (Opcode()) {
         case FILLED_NEW_ARRAY:
         {
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 4352c4a..1ff5c19 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -118,13 +118,30 @@
   };
 
   enum Flags {
-    kBranch   = 0x01,  // conditional or unconditional branch
-    kContinue = 0x02,  // flow can continue to next statement
-    kSwitch   = 0x04,  // switch statement
-    kThrow    = 0x08,  // could cause an exception to be thrown
-    kReturn   = 0x10,  // returns, no additional statements
-    kInvoke   = 0x20,  // a flavor of invoke
-    kUnconditional = 0x40,  // unconditional branch
+    kBranch              = 0x000001,  // conditional or unconditional branch
+    kContinue            = 0x000002,  // flow can continue to next statement
+    kSwitch              = 0x000004,  // switch statement
+    kThrow               = 0x000008,  // could cause an exception to be thrown
+    kReturn              = 0x000010,  // returns, no additional statements
+    kInvoke              = 0x000020,  // a flavor of invoke
+    kUnconditional       = 0x000040,  // unconditional branch
+    kAdd                 = 0x000080,  // addition
+    kSubtract            = 0x000100,  // subtract
+    kMultiply            = 0x000200,  // multiply
+    kDivide              = 0x000400,  // division
+    kRemainder           = 0x000800,  // remainder
+    kAnd                 = 0x001000,  // and
+    kOr                  = 0x002000,  // or
+    kXor                 = 0x004000,  // xor
+    kShl                 = 0x008000,  // shl
+    kShr                 = 0x010000,  // shr
+    kUshr                = 0x020000,  // ushr
+    kCast                = 0x040000,  // cast
+    kStore               = 0x080000,  // store opcode
+    kLoad                = 0x100000,  // load opcode
+    kClobber             = 0x200000,  // clobbers memory in a big way (not just a write)
+    kRegCFieldOrConstant = 0x400000,  // is the third virtual register a field or literal constant (vC)
+    kRegBFieldOrConstant = 0x800000,  // is the second virtual register a field or literal constant (vB)
   };
 
   enum VerifyFlag {
@@ -151,8 +168,7 @@
     kVerifyError           = 0x80000,
   };
 
-  // Decodes this instruction, populating its arguments.
-  void Decode(uint32_t &vA, uint32_t &vB, uint64_t &vB_wide, uint32_t &vC, uint32_t arg[]) const;
+  static constexpr uint32_t kMaxVarArgRegs = 5;
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -313,6 +329,9 @@
   bool HasVRegB() const;
   int32_t VRegB() const;
 
+  bool HasWideVRegB() const;
+  uint64_t WideVRegB() const;
+
   int4_t VRegB_11n() const {
     return VRegB_11n(Fetch16(0));
   }
@@ -365,9 +384,10 @@
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  void GetArgs(uint32_t args[5], uint16_t inst_data) const;
-  void GetArgs(uint32_t args[5]) const {
-    return GetArgs(args, Fetch16(0));
+  bool HasVarArgs() const;
+  void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
+  void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
+    return GetVarArgs(args, Fetch16(0));
   }
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
@@ -549,23 +569,6 @@
 std::ostream& operator<<(std::ostream& os, const Instruction::Flags& flags);
 std::ostream& operator<<(std::ostream& os, const Instruction::VerifyFlag& vflags);
 
-/*
- * Holds the contents of a decoded instruction.
- */
-struct DecodedInstruction {
-  uint32_t vA;
-  uint32_t vB;
-  uint64_t vB_wide;        /* for k51l */
-  uint32_t vC;
-  uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
-  Instruction::Code opcode;
-
-  explicit DecodedInstruction(const Instruction* inst) {
-    inst->Decode(vA, vB, vB_wide, vC, arg);
-    opcode = inst->Opcode();
-  }
-};
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_DEX_INSTRUCTION_H_
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index c2cd65a..f43e42f 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -36,27 +36,27 @@
   V(0x0F, RETURN, "return", k11x, false, kNone, kReturn, kVerifyRegA) \
   V(0x10, RETURN_WIDE, "return-wide", k11x, false, kNone, kReturn, kVerifyRegAWide) \
   V(0x11, RETURN_OBJECT, "return-object", k11x, false, kNone, kReturn, kVerifyRegA) \
-  V(0x12, CONST_4, "const/4", k11n, true, kNone, kContinue, kVerifyRegA) \
-  V(0x13, CONST_16, "const/16", k21s, true, kNone, kContinue, kVerifyRegA) \
-  V(0x14, CONST, "const", k31i, true, kNone, kContinue, kVerifyRegA) \
-  V(0x15, CONST_HIGH16, "const/high16", k21h, true, kNone, kContinue, kVerifyRegA) \
-  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, true, kNone, kContinue, kVerifyRegAWide) \
-  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, true, kNone, kContinue, kVerifyRegAWide) \
-  V(0x18, CONST_WIDE, "const-wide", k51l, true, kNone, kContinue, kVerifyRegAWide) \
-  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, true, kNone, kContinue, kVerifyRegAWide) \
+  V(0x12, CONST_4, "const/4", k11n, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x13, CONST_16, "const/16", k21s, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x14, CONST, "const", k31i, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x15, CONST_HIGH16, "const/high16", k21h, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x18, CONST_WIDE, "const-wide", k51l, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, true, kNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
   V(0x1A, CONST_STRING, "const-string", k21c, true, kStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
   V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, true, kStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
   V(0x1C, CONST_CLASS, "const-class", k21c, true, kTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, false, kNone, kContinue | kThrow, kVerifyRegA) \
-  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, false, kNone, kContinue | kThrow, kVerifyRegA) \
+  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, false, kNone, kContinue | kThrow | kClobber, kVerifyRegA) \
+  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, false, kNone, kContinue | kThrow | kClobber, kVerifyRegA) \
   V(0x1F, CHECK_CAST, "check-cast", k21c, true, kTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
   V(0x20, INSTANCE_OF, "instance-of", k22c, true, kTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
   V(0x21, ARRAY_LENGTH, "array-length", k12x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0x22, NEW_INSTANCE, "new-instance", k21c, true, kTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBNewInstance) \
-  V(0x23, NEW_ARRAY, "new-array", k22c, true, kTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
-  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, false, kTypeRef, kContinue | kThrow, kVerifyRegBType | kVerifyVarArg) \
-  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, false, kTypeRef, kContinue | kThrow, kVerifyRegBType | kVerifyVarArgRange) \
-  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyArrayData) \
+  V(0x22, NEW_INSTANCE, "new-instance", k21c, true, kTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
+  V(0x23, NEW_ARRAY, "new-array", k22c, true, kTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
+  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, false, kTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
+  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, false, kTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
+  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, false, kNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
   V(0x27, THROW, "throw", k11x, false, kNone, kThrow, kVerifyRegA) \
   V(0x28, GOTO, "goto", k10t, false, kNone, kBranch | kUnconditional, kVerifyBranchTarget) \
   V(0x29, GOTO_16, "goto/16", k20t, false, kNone, kBranch | kUnconditional, kVerifyBranchTarget) \
@@ -86,48 +86,48 @@
   V(0x41, UNUSED_41, "unused-41", k10x, false, kUnknown, 0, kVerifyError) \
   V(0x42, UNUSED_42, "unused-42", k10x, false, kUnknown, 0, kVerifyError) \
   V(0x43, UNUSED_43, "unused-43", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0x44, AGET, "aget", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x45, AGET_WIDE, "aget-wide", k23x, true, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x46, AGET_OBJECT, "aget-object", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x48, AGET_BYTE, "aget-byte", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x49, AGET_CHAR, "aget-char", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4A, AGET_SHORT, "aget-short", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4B, APUT, "aput", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4C, APUT_WIDE, "aput-wide", k23x, false, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x4D, APUT_OBJECT, "aput-object", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4F, APUT_BYTE, "aput-byte", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x50, APUT_CHAR, "aput-char", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x51, APUT_SHORT, "aput-short", k23x, false, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x52, IGET, "iget", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x53, IGET_WIDE, "iget-wide", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x54, IGET_OBJECT, "iget-object", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x56, IGET_BYTE, "iget-byte", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x57, IGET_CHAR, "iget-char", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x58, IGET_SHORT, "iget-short", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x59, IPUT, "iput", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5A, IPUT_WIDE, "iput-wide", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x5B, IPUT_OBJECT, "iput-object", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5D, IPUT_BYTE, "iput-byte", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5E, IPUT_CHAR, "iput-char", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5F, IPUT_SHORT, "iput-short", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x60, SGET, "sget", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x61, SGET_WIDE, "sget-wide", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x62, SGET_OBJECT, "sget-object", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x64, SGET_BYTE, "sget-byte", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x65, SGET_CHAR, "sget-char", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x66, SGET_SHORT, "sget-short", k21c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x67, SPUT, "sput", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
-  V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBField) \
+  V(0x44, AGET, "aget", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x45, AGET_WIDE, "aget-wide", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x46, AGET_OBJECT, "aget-object", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x48, AGET_BYTE, "aget-byte", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x49, AGET_CHAR, "aget-char", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4A, AGET_SHORT, "aget-short", k23x, true, kNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4B, APUT, "aput", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4C, APUT_WIDE, "aput-wide", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x4D, APUT_OBJECT, "aput-object", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4F, APUT_BYTE, "aput-byte", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x50, APUT_CHAR, "aput-char", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x51, APUT_SHORT, "aput-short", k23x, false, kNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x52, IGET, "iget", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x53, IGET_WIDE, "iget-wide", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x54, IGET_OBJECT, "iget-object", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x56, IGET_BYTE, "iget-byte", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x57, IGET_CHAR, "iget-char", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x58, IGET_SHORT, "iget-short", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x59, IPUT, "iput", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5A, IPUT_WIDE, "iput-wide", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x5B, IPUT_OBJECT, "iput-object", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5D, IPUT_BYTE, "iput-byte", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5E, IPUT_CHAR, "iput-char", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5F, IPUT_SHORT, "iput-short", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x60, SGET, "sget", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x61, SGET_WIDE, "sget-wide", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x62, SGET_OBJECT, "sget-object", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x64, SGET_BYTE, "sget-byte", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x65, SGET_CHAR, "sget-char", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x66, SGET_SHORT, "sget-short", k21c, true, kFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x67, SPUT, "sput", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x69, SPUT_OBJECT, "sput-object", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
   V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
   V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
   V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
@@ -147,110 +147,110 @@
   V(0x7E, NOT_LONG, "not-long", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
   V(0x7F, NEG_FLOAT, "neg-float", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
   V(0x80, NEG_DOUBLE, "neg-double", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x81, INT_TO_LONG, "int-to-long", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0x84, LONG_TO_INT, "long-to-int", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegBWide) \
-  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegBWide) \
-  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x90, ADD_INT, "add-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x91, SUB_INT, "sub-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x92, MUL_INT, "mul-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x93, DIV_INT, "div-int", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x94, REM_INT, "rem-int", k23x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x95, AND_INT, "and-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x96, OR_INT, "or-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x97, XOR_INT, "xor-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x98, SHL_INT, "shl-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x99, SHR_INT, "shr-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9A, USHR_INT, "ushr-int", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9B, ADD_LONG, "add-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9C, SUB_LONG, "sub-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9D, MUL_LONG, "mul-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9E, DIV_LONG, "div-long", k23x, true, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9F, REM_LONG, "rem-long", k23x, true, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA0, AND_LONG, "and-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA1, OR_LONG, "or-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA2, XOR_LONG, "xor-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA3, SHL_LONG, "shl-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA4, SHR_LONG, "shr-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA5, USHR_LONG, "ushr-long", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA6, ADD_FLOAT, "add-float", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA7, SUB_FLOAT, "sub-float", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA8, MUL_FLOAT, "mul-float", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA9, DIV_FLOAT, "div-float", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAA, REM_FLOAT, "rem-float", k23x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAB, ADD_DOUBLE, "add-double", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAC, SUB_DOUBLE, "sub-double", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAD, MUL_DOUBLE, "mul-double", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAE, DIV_DOUBLE, "div-double", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAF, REM_DOUBLE, "rem-double", k23x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, true, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, true, kNone, kContinue | kThrow, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, true, kNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD1, RSUB_INT, "rsub-int", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, true, kNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
+  V(0x81, INT_TO_LONG, "int-to-long", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x84, LONG_TO_INT, "long-to-int", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, true, kNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, true, kNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x90, ADD_INT, "add-int", k23x, true, kNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x91, SUB_INT, "sub-int", k23x, true, kNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x92, MUL_INT, "mul-int", k23x, true, kNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x93, DIV_INT, "div-int", k23x, true, kNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x94, REM_INT, "rem-int", k23x, true, kNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x95, AND_INT, "and-int", k23x, true, kNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x96, OR_INT, "or-int", k23x, true, kNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x97, XOR_INT, "xor-int", k23x, true, kNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x98, SHL_INT, "shl-int", k23x, true, kNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x99, SHR_INT, "shr-int", k23x, true, kNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9A, USHR_INT, "ushr-int", k23x, true, kNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9B, ADD_LONG, "add-long", k23x, true, kNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9C, SUB_LONG, "sub-long", k23x, true, kNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9D, MUL_LONG, "mul-long", k23x, true, kNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9E, DIV_LONG, "div-long", k23x, true, kNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9F, REM_LONG, "rem-long", k23x, true, kNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA0, AND_LONG, "and-long", k23x, true, kNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA1, OR_LONG, "or-long", k23x, true, kNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA2, XOR_LONG, "xor-long", k23x, true, kNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA3, SHL_LONG, "shl-long", k23x, true, kNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA4, SHR_LONG, "shr-long", k23x, true, kNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA5, USHR_LONG, "ushr-long", k23x, true, kNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA6, ADD_FLOAT, "add-float", k23x, true, kNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA7, SUB_FLOAT, "sub-float", k23x, true, kNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA8, MUL_FLOAT, "mul-float", k23x, true, kNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA9, DIV_FLOAT, "div-float", k23x, true, kNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAA, REM_FLOAT, "rem-float", k23x, true, kNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAB, ADD_DOUBLE, "add-double", k23x, true, kNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAC, SUB_DOUBLE, "sub-double", k23x, true, kNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAD, MUL_DOUBLE, "mul-double", k23x, true, kNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAE, DIV_DOUBLE, "div-double", k23x, true, kNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAF, REM_DOUBLE, "rem-double", k23x, true, kNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, true, kNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, true, kNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, true, kNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, true, kNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, true, kNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, true, kNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
+  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, true, kNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
+  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, true, kNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
+  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, true, kNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
+  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, true, kNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
+  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, true, kNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
+  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, true, kNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, true, kNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, true, kNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, true, kNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, true, kNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, true, kNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, true, kNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, true, kNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, true, kNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, true, kNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, true, kNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, true, kNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, true, kNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, true, kNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, true, kNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, true, kNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, true, kNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, true, kNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, true, kNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, true, kNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, true, kNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, true, kNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD1, RSUB_INT, "rsub-int", k22s, true, kNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, true, kNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, true, kNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, true, kNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, true, kNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, true, kNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, true, kNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, true, kNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, true, kNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, true, kNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, true, kNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, true, kNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, true, kNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, true, kNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, true, kNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
   V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg) \
   V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange) \
   V(0xEB, UNUSED_EB, "unused-eb", k10x, false, kUnknown, 0, kVerifyError) \
diff --git a/runtime/dex_instruction_visitor_test.cc b/runtime/dex_instruction_visitor_test.cc
index 8f42b0c..c5e63eb 100644
--- a/runtime/dex_instruction_visitor_test.cc
+++ b/runtime/dex_instruction_visitor_test.cc
@@ -17,8 +17,8 @@
 #include "dex_instruction_visitor.h"
 
 #include <iostream>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -26,7 +26,7 @@
 class TestVisitor : public DexInstructionVisitor<TestVisitor> {};
 
 TEST(InstructionTest, Init) {
-  UniquePtr<TestVisitor> visitor(new TestVisitor);
+  std::unique_ptr<TestVisitor> visitor(new TestVisitor);
 }
 
 class CountVisitor : public DexInstructionVisitor<CountVisitor> {
diff --git a/runtime/dex_method_iterator.h b/runtime/dex_method_iterator.h
index 1975e48..806266d 100644
--- a/runtime/dex_method_iterator.h
+++ b/runtime/dex_method_iterator.h
@@ -140,7 +140,7 @@
   uint32_t class_def_index_;
   const DexFile::ClassDef* class_def_;
   const byte* class_data_;
-  UniquePtr<ClassDataItemIterator> it_;
+  std::unique_ptr<ClassDataItemIterator> it_;
   bool direct_method_;
 };
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 01ca60f..5d20096 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -127,7 +127,7 @@
 
 ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only,
                        std::string* error_msg) {
-  UniquePtr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
+  std::unique_ptr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
   if (!elf_file->Setup(error_msg)) {
     return nullptr;
   }
@@ -844,7 +844,7 @@
     if (program_header.p_vaddr == 0) {
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
-      UniquePtr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
+      std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
                                                      NULL, GetLoadedSize(), PROT_NONE, false,
                                                      error_msg));
       if (reserve.get() == nullptr) {
@@ -884,7 +884,7 @@
                                 file_->GetPath().c_str());
       return false;
     }
-    UniquePtr<MemMap> segment(MemMap::MapFileAtAddress(p_vaddr,
+    std::unique_ptr<MemMap> segment(MemMap::MapFileAtAddress(p_vaddr,
                                                        program_header.p_memsz,
                                                        prot, flags, file_->Fd(),
                                                        program_header.p_offset,
@@ -999,7 +999,7 @@
 
   // Well, we need the whole file to do this.
   std::string error_msg;
-  UniquePtr<ElfFile> ptr(Open(const_cast<File*>(file_), false, false, &error_msg));
+  std::unique_ptr<ElfFile> ptr(Open(const_cast<File*>(file_), false, false, &error_msg));
   ElfFile& all = *ptr;
 
   // Do we have interesting sections?
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index d2a044e..6650acd 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ELF_FILE_H_
 
 #include <map>
+#include <memory>
 #include <vector>
 
 #include "base/unix_file/fd_file.h"
@@ -25,7 +26,6 @@
 #include "elf_utils.h"
 #include "mem_map.h"
 #include "os.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -154,7 +154,7 @@
 
   // ELF header mapping. If program_header_only_ is false, will
   // actually point to the entire elf file.
-  UniquePtr<MemMap> map_;
+  std::unique_ptr<MemMap> map_;
   Elf32_Ehdr* header_;
   std::vector<MemMap*> segments_;
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index c81706f..39b2ec2 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -139,7 +139,7 @@
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_method_jobj,
                                     std::vector<jvalue>& args) {
   DCHECK(soa.Env()->IsInstanceOf(rcvr_jobj, WellKnownClasses::java_lang_reflect_Proxy));
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 05912bf..58b4286 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -30,7 +30,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/throwable.h"
 #include "object_utils.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 
 namespace art {
@@ -72,7 +72,8 @@
     }
   }
   if (UNLIKELY(!klass->IsInitialized())) {
-    SirtRef<mirror::Class> sirt_klass(self, klass);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_klass(hs.NewHandle(klass));
     // EnsureInitialized (the class initializer) might cause a GC.
     // may cause us to suspend meaning that another thread may try to
     // change the allocator while we are stuck in the entrypoints of
@@ -82,11 +83,11 @@
     // has changed and to null-check the return value in case the
     // initialization fails.
     *slow_path = true;
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true)) {
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
     }
-    return sirt_klass.get();
+    return h_klass.Get();
   }
   return klass;
 }
@@ -96,7 +97,8 @@
                                                                                Thread* self, bool* slow_path)
     NO_THREAD_SAFETY_ANALYSIS {
   if (UNLIKELY(!klass->IsInitialized())) {
-    SirtRef<mirror::Class> sirt_class(self, klass);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(klass));
     // EnsureInitialized (the class initializer) might cause a GC.
     // may cause us to suspend meaning that another thread may try to
     // change the allocator while we are stuck in the entrypoints of
@@ -106,11 +108,11 @@
     // has changed and to null-check the return value in case the
     // initialization fails.
     *slow_path = true;
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_class, true, true)) {
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
     }
-    return sirt_class.get();
+    return h_class.Get();
   }
   return klass;
 }
@@ -132,8 +134,7 @@
     if (klass == nullptr) {
       return nullptr;
     }
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+    return klass->Alloc<kInstrumented>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
   }
   DCHECK(klass != nullptr);
   return klass->Alloc<kInstrumented>(self, allocator_type);
@@ -155,9 +156,11 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+    // Pass in false since the object can not be finalizable.
+    return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  return klass->Alloc<kInstrumented>(self, allocator_type);
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
 // Given the context of a calling Method and an initialized class, create an instance.
@@ -169,7 +172,8 @@
                                                                            gc::AllocatorType allocator_type)
     NO_THREAD_SAFETY_ANALYSIS {
   DCHECK(klass != nullptr);
-  return klass->Alloc<kInstrumented>(self, allocator_type);
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
 
@@ -344,14 +348,14 @@
     if (LIKELY(fields_class->IsInitialized())) {
       return resolved_field;
     } else {
-      SirtRef<mirror::Class> sirt_class(self, fields_class);
-      if (LIKELY(class_linker->EnsureInitialized(sirt_class, true, true))) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(fields_class));
+      if (LIKELY(class_linker->EnsureInitialized(h_class, true, true))) {
         // Otherwise let's ensure the class is initialized before resolving the field.
         return resolved_field;
-      } else {
-        DCHECK(self->IsExceptionPending());  // Throw exception and unwind
-        return nullptr;  // Failure.
       }
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind
+      return nullptr;  // Failure.
     }
   }
 }
@@ -381,30 +385,36 @@
 
 template<InvokeType type, bool access_check>
 static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                                    mirror::Object* this_object,
-                                                    mirror::ArtMethod* referrer, Thread* self) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  SirtRef<mirror::Object> sirt_this(self, type == kStatic ? nullptr : this_object);
-  mirror::ArtMethod* resolved_method = class_linker->ResolveMethod(method_idx, referrer, type);
+                                                    mirror::Object** this_object,
+                                                    mirror::ArtMethod** referrer, Thread* self) {
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, *referrer, type);
+  if (resolved_method == nullptr) {
+    StackHandleScope<1> hs(self);
+    mirror::Object* null_this = nullptr;
+    HandleWrapper<mirror::Object> h_this(
+        hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
+    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+  }
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(sirt_this.get() == nullptr && type != kStatic)) {
+  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
     // Maintain interpreter-like semantics where NullPointerException is thrown
     // after potential NoSuchMethodError from class linker.
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-    DCHECK(referrer == throw_location.GetMethod());
+    DCHECK_EQ(*referrer, throw_location.GetMethod());
     ThrowNullPointerExceptionForMethodAccess(throw_location, method_idx, type);
     return nullptr;  // Failure.
   } else if (access_check) {
     // Incompatible class change should have been handled in resolve method.
     if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) {
       ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method,
-                                        referrer);
+                                        *referrer);
       return nullptr;  // Failure.
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-    mirror::Class* referring_class = referrer->GetDeclaringClass();
+    mirror::Class* referring_class = (*referrer)->GetDeclaringClass();
     bool can_access_resolved_method =
         referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
                                                          method_idx);
@@ -418,7 +428,7 @@
     case kDirect:
       return resolved_method;
     case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = sirt_this->GetClass()->GetVTable();
+      mirror::ObjectArray<mirror::ArtMethod>* vtable = (*this_object)->GetClass()->GetVTable();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       if (access_check &&
           (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
@@ -432,7 +442,7 @@
       return vtable->GetWithoutChecks(vtable_index);
     }
     case kSuper: {
-      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+      mirror::Class* super_class = (*referrer)->GetDeclaringClass()->GetSuperClass();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       mirror::ObjectArray<mirror::ArtMethod>* vtable;
       if (access_check) {
@@ -455,20 +465,19 @@
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % ClassLinker::kImtSize;
-      mirror::ObjectArray<mirror::ArtMethod>* imt_table = sirt_this->GetClass()->GetImTable();
+      mirror::ObjectArray<mirror::ArtMethod>* imt_table = (*this_object)->GetClass()->GetImTable();
       mirror::ArtMethod* imt_method = imt_table->Get(imt_index);
       if (!imt_method->IsImtConflictMethod()) {
         return imt_method;
       } else {
         mirror::ArtMethod* interface_method =
-            sirt_this->GetClass()->FindVirtualMethodForInterface(resolved_method);
+            (*this_object)->GetClass()->FindVirtualMethodForInterface(resolved_method);
         if (UNLIKELY(interface_method == nullptr)) {
           ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
-                                                                     sirt_this.get(), referrer);
+                                                                     *this_object, *referrer);
           return nullptr;  // Failure.
-        } else {
-          return interface_method;
         }
+        return interface_method;
       }
     }
     default:
@@ -481,8 +490,8 @@
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, _access_check)                 \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
   mirror::ArtMethod* FindMethodFromCode<_type, _access_check>(uint32_t method_idx,         \
-                                                              mirror::Object* this_object, \
-                                                              mirror::ArtMethod* referrer, \
+                                                              mirror::Object** this_object, \
+                                                              mirror::ArtMethod** referrer, \
                                                               Thread* self)
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
     EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, false);   \
@@ -623,12 +632,13 @@
   if (klass == referring_class && referrer->IsConstructor() && referrer->IsStatic()) {
     return klass;
   }
-  SirtRef<mirror::Class> sirt_class(self, klass);
-  if (!class_linker->EnsureInitialized(sirt_class, true, true)) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> h_class(hs.NewHandle(klass));
+  if (!class_linker->EnsureInitialized(h_class, true, true)) {
     CHECK(self->IsExceptionPending());
     return nullptr;  // Failure - Indicate to caller to deliver exception
   }
-  return sirt_class.get();
+  return h_class.Get();
 }
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -693,7 +703,7 @@
   }
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_art_method_jobj,
                                     std::vector<jvalue>& args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -731,11 +741,6 @@
   return reinterpret_cast<void*>(art_quick_to_interpreter_bridge);
 }
 
-extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
-static inline const void* GetQuickGenericJniTrampoline() {
-  return reinterpret_cast<void*>(art_quick_generic_jni_trampoline);
-}
-
 static inline const void* GetQuickToPortableBridge() {
   // TODO: quick to portable bridge. Bug: 8196384
   return GetQuickToInterpreterBridge();
@@ -757,10 +762,6 @@
   return class_linker->GetQuickImtConflictTrampoline();
 }
 
-static inline const void* GetQuickGenericJniTrampoline(ClassLinker* class_linker) {
-  return class_linker->GetQuickGenericJniTrampoline();
-}
-
 static inline const void* GetQuickToInterpreterBridgeTrampoline(ClassLinker* class_linker) {
   return class_linker->GetQuickToInterpreterBridgeTrampoline();
 }
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index a0ba6b9..f2e2bf7 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -25,6 +25,7 @@
 
 namespace art {
 
+// TODO: Make the MethodHelper here be compaction safe.
 extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
                                                    const DexFile::CodeItem* code_item,
                                                    ShadowFrame* shadow_frame, JValue* result) {
@@ -34,14 +35,17 @@
     mirror::Class* declaringClass = method->GetDeclaringClass();
     if (UNLIKELY(!declaringClass->IsInitializing())) {
       self->PushShadowFrame(shadow_frame);
-      SirtRef<mirror::Class> sirt_c(self, declaringClass);
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_c, true, true))) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true))) {
         self->PopShadowFrame();
         DCHECK(self->IsExceptionPending());
         return;
       }
       self->PopShadowFrame();
-      CHECK(sirt_c->IsInitializing());
+      CHECK(h_class->IsInitializing());
+      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
+      method = shadow_frame->GetMethod();
     }
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index d34b097..3a898e8 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -23,17 +23,20 @@
 
 template<InvokeType type, bool access_check>
 mirror::ArtMethod* FindMethodHelper(uint32_t method_idx, mirror::Object* this_object,
-                                    mirror::ArtMethod* caller_method, Thread* thread) {
+                                    mirror::ArtMethod* caller_method, Thread* self) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method,
                                              access_check, type);
   if (UNLIKELY(method == NULL)) {
-    method = FindMethodFromCode<type, access_check>(method_idx, this_object, caller_method, thread);
+    // Note: This can cause thread suspension.
+    self->AssertThreadSuspensionIsAllowable();
+    method = FindMethodFromCode<type, access_check>(method_idx, &this_object, &caller_method,
+                                                    self);
     if (UNLIKELY(method == NULL)) {
-      CHECK(thread->IsExceptionPending());
+      CHECK(self->IsExceptionPending());
       return 0;  // failure
     }
   }
-  DCHECK(!thread->IsExceptionPending());
+  DCHECK(!self->IsExceptionPending());
   const void* code = method->GetEntryPointFromPortableCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
diff --git a/runtime/entrypoints/portable/portable_jni_entrypoints.cc b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
index 17ad4d0..3e7b30a 100644
--- a/runtime/entrypoints/portable/portable_jni_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_jni_entrypoints.cc
@@ -37,7 +37,8 @@
   return art_portable_jni_method_start(self);
 }
 
-static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) {
+static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
   env->locals.SetSegmentState(env->local_ref_cookie);
   env->local_ref_cookie = saved_local_ref_cookie;
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index f1b15b5..3756f47 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -214,8 +214,9 @@
 
     if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
       // Ensure static method's class is initialized.
-      SirtRef<mirror::Class> sirt_c(self, method->GetDeclaringClass());
-      if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_c, true, true)) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
+      if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
         DCHECK(Thread::Current()->IsExceptionPending());
         self->PopManagedStackFragment(fragment);
         return 0;
@@ -316,11 +317,11 @@
 // Lazily resolve a method for portable. Called by stub code.
 extern "C" const void* artPortableResolutionTrampoline(mirror::ArtMethod* called,
                                                        mirror::Object* receiver,
-                                                       Thread* thread,
+                                                       Thread* self,
                                                        mirror::ArtMethod** called_addr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t dex_pc;
-  mirror::ArtMethod* caller = thread->GetCurrentMethod(&dex_pc);
+  mirror::ArtMethod* caller = self->GetCurrentMethod(&dex_pc);
 
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   InvokeType invoke_type;
@@ -378,7 +379,7 @@
         is_range = true;
     }
     uint32_t dex_method_idx = (is_range) ? instr->VRegB_3rc() : instr->VRegB_35c();
-    called = linker->ResolveMethod(dex_method_idx, caller, invoke_type);
+    called = linker->ResolveMethod(Thread::Current(), dex_method_idx, &caller, invoke_type);
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
     // Refine called method based on receiver.
@@ -394,9 +395,10 @@
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
   }
   const void* code = nullptr;
-  if (LIKELY(!thread->IsExceptionPending())) {
+  if (LIKELY(!self->IsExceptionPending())) {
     // Ensure that the called method's class is initialized.
-    SirtRef<mirror::Class> called_class(thread, called->GetDeclaringClass());
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
       code = called->GetEntryPointFromPortableCompiledCode();
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 2d5c07d..c38a595 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -242,9 +242,11 @@
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
                                         Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  mirror::ArtMethod* referrer =
-      sp[callee_save->GetFrameSizeInBytes() / sizeof(mirror::ArtMethod*)];
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  uint32_t frame_size =
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
+  mirror::ArtMethod* referrer = sp[frame_size / sizeof(mirror::ArtMethod*)];
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
   if (LIKELY(field != NULL  && obj != NULL)) {
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 60c5377..11a4b3b 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -48,10 +48,13 @@
   //       stack.
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
-  mirror::ArtMethod* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
+  Runtime* runtime = Runtime::Current();
+  mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
   *sp = callee_save;
+  uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
+      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
-                                                      callee_save->GetReturnPcOffsetInBytes());
+                                                      return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 116957d..5d36b4c 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -61,11 +61,12 @@
   }
 }
 
-static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) {
+static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
   env->locals.SetSegmentState(env->local_ref_cookie);
   env->local_ref_cookie = saved_local_ref_cookie;
-  self->PopSirt();
+  self->PopHandleScope();
 }
 
 extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 680ffbe..554bff4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -489,8 +489,9 @@
 
     if (method->IsStatic() && !method->GetDeclaringClass()->IsInitializing()) {
       // Ensure static method's class is initialized.
-      SirtRef<mirror::Class> sirt_c(self, method->GetDeclaringClass());
-      if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_c, true, true)) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
+      if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
         DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(method);
         self->PopManagedStackFragment(fragment);
         return 0;
@@ -522,7 +523,7 @@
   ScopedObjectAccessUnchecked* const soa_;
   std::vector<jvalue>* const args_;
   // References which we must update when exiting in case the GC moved the objects.
-  std::vector<std::pair<jobject, StackReference<mirror::Object>*> > references_;
+  std::vector<std::pair<jobject, StackReference<mirror::Object>*>> references_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildQuickArgumentVisitor);
 };
@@ -639,7 +640,7 @@
  private:
   ScopedObjectAccessUnchecked* const soa_;
   // References which we must update when exiting in case the GC moved the objects.
-  std::vector<std::pair<jobject, StackReference<mirror::Object>*> > references_;
+  std::vector<std::pair<jobject, StackReference<mirror::Object>*>> references_;
   DISALLOW_COPY_AND_ASSIGN(RememberForGcArgumentVisitor);
 };
 
@@ -754,10 +755,12 @@
   self->EndAssertNoThreadSuspension(old_cause);
   bool virtual_or_interface = invoke_type == kVirtual || invoke_type == kInterface;
   // Resolve method filling in dex cache.
-  if (called->IsRuntimeMethod()) {
-    SirtRef<mirror::Object> sirt_receiver(soa.Self(), virtual_or_interface ? receiver : nullptr);
-    called = linker->ResolveMethod(dex_method_idx, caller, invoke_type);
-    receiver = sirt_receiver.get();
+  if (UNLIKELY(called->IsRuntimeMethod())) {
+    StackHandleScope<1> hs(self);
+    mirror::Object* dummy = nullptr;
+    HandleWrapper<mirror::Object> h_receiver(
+        hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
+    called = linker->ResolveMethod(self, dex_method_idx, &caller, invoke_type);
   }
   const void* code = NULL;
   if (LIKELY(!self->IsExceptionPending())) {
@@ -767,11 +770,18 @@
     if (virtual_or_interface) {
       // Refine called method based on receiver.
       CHECK(receiver != nullptr) << invoke_type;
+
+      mirror::ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called);
       } else {
         called = receiver->GetClass()->FindVirtualMethodForInterface(called);
       }
+
+      CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
+                               << PrettyTypeOf(receiver) << " "
+                               << invoke_type << " " << orig_called->GetVtableIndex();
+
       // We came here because of sharpening. Ensure the dex cache is up-to-date on the method index
       // of the sharpened method.
       if (called->GetDexCacheResolvedMethods() == caller->GetDexCacheResolvedMethods()) {
@@ -789,7 +799,8 @@
       }
     }
     // Ensure that the called method's class is initialized.
-    SirtRef<mirror::Class> called_class(soa.Self(), called->GetDeclaringClass());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
       code = called->GetEntryPointFromQuickCompiledCode();
@@ -850,10 +861,10 @@
  *
  * void PushStack(uintptr_t): Push a value to the stack.
  *
- * uintptr_t PushSirt(mirror::Object* ref): Add a reference to the Sirt. This _will_ have nullptr,
+ * uintptr_t PushHandleScope(mirror::Object* ref): Add a reference to the HandleScope. This _will_ have nullptr,
  *                                          as this might be important for null initialization.
  *                                          Must return the jobject, that is, the reference to the
- *                                          entry in the Sirt (nullptr if necessary).
+ *                                          entry in the HandleScope (nullptr if necessary).
  *
  */
 template <class T> class BuildGenericJniFrameStateMachine {
@@ -949,18 +960,18 @@
   }
 
 
-  bool HaveSirtGpr() {
+  bool HaveHandleScopeGpr() {
     return gpr_index_ > 0;
   }
 
-  void AdvanceSirt(mirror::Object* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uintptr_t sirtRef = PushSirt(ptr);
-    if (HaveSirtGpr()) {
+  void AdvanceHandleScope(mirror::Object* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t handle = PushHandle(ptr);
+    if (HaveHandleScopeGpr()) {
       gpr_index_--;
-      PushGpr(sirtRef);
+      PushGpr(handle);
     } else {
       stack_entries_++;
-      PushStack(sirtRef);
+      PushStack(handle);
       gpr_index_ = 0;
     }
   }
@@ -1140,8 +1151,8 @@
   void PushStack(uintptr_t val) {
     delegate_->PushStack(val);
   }
-  uintptr_t PushSirt(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return delegate_->PushSirt(ref);
+  uintptr_t PushHandle(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return delegate_->PushHandle(ref);
   }
 
   uint32_t gpr_index_;      // Number of free GPRs
@@ -1153,7 +1164,7 @@
 
 class ComputeGenericJniFrameSize FINAL {
  public:
-  ComputeGenericJniFrameSize() : num_sirt_references_(0), num_stack_entries_(0) {}
+  ComputeGenericJniFrameSize() : num_handle_scope_references_(0), num_stack_entries_(0) {}
 
   uint32_t GetStackSize() {
     return num_stack_entries_ * sizeof(uintptr_t);
@@ -1161,7 +1172,7 @@
 
   // WARNING: After this, *sp won't be pointing to the method anymore!
   void ComputeLayout(mirror::ArtMethod*** m, bool is_static, const char* shorty, uint32_t shorty_len,
-                     void* sp, StackIndirectReferenceTable** table, uint32_t* sirt_entries,
+                     void* sp, HandleScope** table, uint32_t* handle_scope_entries,
                      uintptr_t** start_stack, uintptr_t** start_gpr, uint32_t** start_fpr,
                      void** code_return, size_t* overall_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1172,17 +1183,17 @@
     uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
 
     // First, fix up the layout of the callee-save frame.
-    // We have to squeeze in the Sirt, and relocate the method pointer.
+    // We have to squeeze in the HandleScope, and relocate the method pointer.
 
     // "Free" the slot for the method.
     sp8 += kPointerSize;
 
-    // Add the Sirt.
-    *sirt_entries = num_sirt_references_;
-    size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSize(num_sirt_references_);
-    sp8 -= sirt_size;
-    *table = reinterpret_cast<StackIndirectReferenceTable*>(sp8);
-    (*table)->SetNumberOfReferences(num_sirt_references_);
+    // Add the HandleScope.
+    *handle_scope_entries = num_handle_scope_references_;
+    size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSize(num_handle_scope_references_);
+    sp8 -= handle_scope_size;
+    *table = reinterpret_cast<HandleScope*>(sp8);
+    (*table)->SetNumberOfReferences(num_handle_scope_references_);
 
     // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
     sp8 -= kPointerSize;
@@ -1192,8 +1203,8 @@
 
     // Reference cookie and padding
     sp8 -= 8;
-    // Store Sirt size
-    *reinterpret_cast<uint32_t*>(sp8) = static_cast<uint32_t>(sirt_size & 0xFFFFFFFF);
+    // Store HandleScope size
+    *reinterpret_cast<uint32_t*>(sp8) = static_cast<uint32_t>(handle_scope_size & 0xFFFFFFFF);
 
     // Next comes the native call stack.
     sp8 -= GetStackSize();
@@ -1222,7 +1233,7 @@
     *(reinterpret_cast<uint8_t**>(sp8)) = method_pointer;
   }
 
-  void ComputeSirtOffset() { }  // nothing to do, static right now
+  void ComputeHandleScopeOffset() { }  // nothing to do, static right now
 
   void ComputeAll(bool is_static, const char* shorty, uint32_t shorty_len)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1232,13 +1243,13 @@
     sm.AdvancePointer(nullptr);
 
     // Class object or this as first argument
-    sm.AdvanceSirt(reinterpret_cast<mirror::Object*>(0x12345678));
+    sm.AdvanceHandleScope(reinterpret_cast<mirror::Object*>(0x12345678));
 
     for (uint32_t i = 1; i < shorty_len; ++i) {
       Primitive::Type cur_type_ = Primitive::GetType(shorty[i]);
       switch (cur_type_) {
         case Primitive::kPrimNot:
-          sm.AdvanceSirt(reinterpret_cast<mirror::Object*>(0x12345678));
+          sm.AdvanceHandleScope(reinterpret_cast<mirror::Object*>(0x12345678));
           break;
 
         case Primitive::kPrimBoolean:
@@ -1281,13 +1292,13 @@
     // counting is already done in the superclass
   }
 
-  uintptr_t PushSirt(mirror::Object* /* ptr */) {
-    num_sirt_references_++;
+  uintptr_t PushHandle(mirror::Object* /* ptr */) {
+    num_handle_scope_references_++;
     return reinterpret_cast<uintptr_t>(nullptr);
   }
 
  private:
-  uint32_t num_sirt_references_;
+  uint32_t num_handle_scope_references_;
   uint32_t num_stack_entries_;
 };
 
@@ -1299,26 +1310,32 @@
                               uint32_t shorty_len, Thread* self) :
       QuickArgumentVisitor(*sp, is_static, shorty, shorty_len), sm_(this) {
     ComputeGenericJniFrameSize fsc;
-    fsc.ComputeLayout(sp, is_static, shorty, shorty_len, *sp, &sirt_, &sirt_expected_refs_,
+    fsc.ComputeLayout(sp, is_static, shorty, shorty_len, *sp, &handle_scope_, &handle_scope_expected_refs_,
                       &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_, &code_return_,
                       &alloca_used_size_);
-    sirt_number_of_references_ = 0;
-    cur_sirt_entry_ = reinterpret_cast<StackReference<mirror::Object>*>(GetFirstSirtEntry());
+    handle_scope_number_of_references_ = 0;
+    cur_hs_entry_ = GetFirstHandleScopeEntry();
 
     // jni environment is always first argument
     sm_.AdvancePointer(self->GetJniEnv());
 
     if (is_static) {
-      sm_.AdvanceSirt((**sp)->GetDeclaringClass());
+      sm_.AdvanceHandleScope((**sp)->GetDeclaringClass());
     }
   }
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
-  void FinalizeSirt(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FinalizeHandleScope(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  jobject GetFirstSirtEntry() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return reinterpret_cast<jobject>(sirt_->GetStackReference(0));
+  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return handle_scope_->GetHandle(0).GetReference();
+  }
+
+  jobject GetFirstHandleScopeJObject()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return handle_scope_->GetHandle(0).ToJObject();
   }
 
   void PushGpr(uintptr_t val) {
@@ -1342,17 +1359,17 @@
     cur_stack_arg_++;
   }
 
-  uintptr_t PushSirt(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uintptr_t PushHandle(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uintptr_t tmp;
     if (ref == nullptr) {
-      *cur_sirt_entry_ = StackReference<mirror::Object>();
+      *cur_hs_entry_ = StackReference<mirror::Object>();
       tmp = reinterpret_cast<uintptr_t>(nullptr);
     } else {
-      *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref);
-      tmp = reinterpret_cast<uintptr_t>(cur_sirt_entry_);
+      *cur_hs_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref);
+      tmp = reinterpret_cast<uintptr_t>(cur_hs_entry_);
     }
-    cur_sirt_entry_++;
-    sirt_number_of_references_++;
+    cur_hs_entry_++;
+    handle_scope_number_of_references_++;
     return tmp;
   }
 
@@ -1366,14 +1383,14 @@
   }
 
  private:
-  uint32_t sirt_number_of_references_;
-  StackReference<mirror::Object>* cur_sirt_entry_;
-  StackIndirectReferenceTable* sirt_;
-  uint32_t sirt_expected_refs_;
+  uint32_t handle_scope_number_of_references_;
+  StackReference<mirror::Object>* cur_hs_entry_;
+  HandleScope* handle_scope_;
+  uint32_t handle_scope_expected_refs_;
   uintptr_t* cur_gpr_reg_;
   uint32_t* cur_fpr_reg_;
   uintptr_t* cur_stack_arg_;
-  // StackReference<mirror::Object>* top_of_sirt_;
+  // StackReference<mirror::Object>* top_of_handle_scope_;
   void* code_return_;
   size_t alloca_used_size_;
 
@@ -1409,7 +1426,7 @@
     case Primitive::kPrimNot: {
       StackReference<mirror::Object>* stack_ref =
           reinterpret_cast<StackReference<mirror::Object>*>(GetParamAddress());
-      sm_.AdvanceSirt(stack_ref->AsMirrorPtr());
+      sm_.AdvanceHandleScope(stack_ref->AsMirrorPtr());
       break;
     }
     case Primitive::kPrimFloat:
@@ -1428,17 +1445,17 @@
   }
 }
 
-void BuildGenericJniFrameVisitor::FinalizeSirt(Thread* self) {
+void BuildGenericJniFrameVisitor::FinalizeHandleScope(Thread* self) {
   // Initialize padding entries.
-  while (sirt_number_of_references_ < sirt_expected_refs_) {
-    *cur_sirt_entry_ = StackReference<mirror::Object>();
-    cur_sirt_entry_++;
-    sirt_number_of_references_++;
+  while (handle_scope_number_of_references_ < handle_scope_expected_refs_) {
+    *cur_hs_entry_ = StackReference<mirror::Object>();
+    cur_hs_entry_++;
+    handle_scope_number_of_references_++;
   }
-  sirt_->SetNumberOfReferences(sirt_expected_refs_);
-  DCHECK_NE(sirt_expected_refs_, 0U);
-  // Install Sirt.
-  self->PushSirt(sirt_);
+  handle_scope_->SetNumberOfReferences(handle_scope_expected_refs_);
+  DCHECK_NE(handle_scope_expected_refs_, 0U);
+  // Install HandleScope.
+  self->PushHandleScope(handle_scope_);
 }
 
 extern "C" void* artFindNativeMethod();
@@ -1461,11 +1478,11 @@
 
 /*
  * Initializes an alloca region assumed to be directly below sp for a native call:
- * Create a Sirt and call stack and fill a mini stack with values to be pushed to registers.
+ * Create a HandleScope and call stack and fill a mini stack with values to be pushed to registers.
  * The final element on the stack is a pointer to the native code.
  *
  * On entry, the stack has a standard callee-save frame above sp, and an alloca below it.
- * We need to fix this, as the Sirt needs to go into the callee-save frame.
+ * We need to fix this, as the handle scope needs to go into the callee-save frame.
  *
  * The return of this function denotes:
  * 1) How many bytes of the alloca can be released, if the value is non-negative.
@@ -1482,7 +1499,7 @@
   BuildGenericJniFrameVisitor visitor(&sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(),
                                       self);
   visitor.VisitArguments();
-  visitor.FinalizeSirt(self);
+  visitor.FinalizeHandleScope(self);
 
   // fix up managed-stack things in Thread
   self->SetTopOfStack(sp, 0);
@@ -1492,9 +1509,9 @@
   // Start JNI, save the cookie.
   uint32_t cookie;
   if (called->IsSynchronized()) {
-    cookie = JniMethodStartSynchronized(visitor.GetFirstSirtEntry(), self);
+    cookie = JniMethodStartSynchronized(visitor.GetFirstHandleScopeJObject(), self);
     if (self->IsExceptionPending()) {
-      self->PopSirt();
+      self->PopHandleScope();
       // A negative value denotes an error.
       return -1;
     }
@@ -1520,7 +1537,7 @@
       DCHECK(self->IsExceptionPending());    // There should be an exception pending now.
 
       // End JNI, as the assembly will move to deliver the exception.
-      jobject lock = called->IsSynchronized() ? visitor.GetFirstSirtEntry() : nullptr;
+      jobject lock = called->IsSynchronized() ? visitor.GetFirstHandleScopeJObject() : nullptr;
       if (mh.GetShorty()[0] == 'L') {
         artQuickGenericJniEndJNIRef(self, cookie, nullptr, lock);
       } else {
@@ -1542,7 +1559,7 @@
 }
 
 /*
- * Is called after the native JNI code. Responsible for cleanup (SIRT, saved state) and
+ * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and
  * unlocking.
  */
 extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMethod** sp,
@@ -1554,10 +1571,9 @@
 
   jobject lock = nullptr;
   if (called->IsSynchronized()) {
-    StackIndirectReferenceTable* table =
-        reinterpret_cast<StackIndirectReferenceTable*>(
-            reinterpret_cast<uint8_t*>(sp) + kPointerSize);
-    lock = reinterpret_cast<jobject>(table->GetStackReference(0));
+    HandleScope* table = reinterpret_cast<HandleScope*>(
+        reinterpret_cast<uint8_t*>(sp) + kPointerSize);
+    lock = table->GetHandle(0).ToJObject();
   }
 
   MethodHelper mh(called);
@@ -1593,15 +1609,72 @@
   }
 }
 
-template<InvokeType type, bool access_check>
-static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
-                                mirror::ArtMethod* caller_method,
-                                Thread* self, mirror::ArtMethod** sp);
+// The following definitions create return types for two word-sized entities that will be passed
+// in registers so that memory operations for the interface trampolines can be avoided. The entities
+// are the resolved method and the pointer to the code to be invoked.
+//
+// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
+// uint64_t or long long int. We use the upper 32b for code, and the lower 32b for the method.
+//
+// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
+// size_t-sized values.
+//
+// We need two operations:
+//
+// 1) A flag value that signals failure. The assembly stubs expect the method part to be "0".
+//    GetFailureValue() will return a value that has method == 0.
+//
+// 2) A value that combines a code pointer and a method pointer.
+//    GetSuccessValue() constructs this.
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+typedef uint64_t MethodAndCode;
+
+// Encodes method_ptr==nullptr and code_ptr==nullptr
+static constexpr MethodAndCode GetFailureValue() {
+  return 0;
+}
+
+// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
+static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
+  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
+  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
+  return ((code_uint << 32) | method_uint);
+}
+
+#elif defined(__x86_64__) || defined(__aarch64__)
+struct MethodAndCode {
+  uintptr_t method;
+  uintptr_t code;
+};
+
+// Encodes method_ptr==nullptr. Leaves random value in code pointer.
+static MethodAndCode GetFailureValue() {
+  MethodAndCode ret;
+  ret.method = 0;
+  return ret;
+}
+
+// Write values into their respective members.
+static MethodAndCode GetSuccessValue(const void* code, mirror::ArtMethod* method) {
+  MethodAndCode ret;
+  ret.method = reinterpret_cast<uintptr_t>(method);
+  ret.code = reinterpret_cast<uintptr_t>(code);
+  return ret;
+}
+#else
+#error "Unsupported architecture"
+#endif
 
 template<InvokeType type, bool access_check>
-static uint64_t artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
-                                mirror::ArtMethod* caller_method,
-                                Thread* self, mirror::ArtMethod** sp) {
+static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+                                     mirror::ArtMethod* caller_method,
+                                     Thread* self, mirror::ArtMethod** sp);
+
+template<InvokeType type, bool access_check>
+static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
+                                     mirror::ArtMethod* caller_method,
+                                     Thread* self, mirror::ArtMethod** sp) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
                                              type);
   if (UNLIKELY(method == nullptr)) {
@@ -1615,13 +1688,14 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, type == kStatic, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<type, access_check>(method_idx, this_object, caller_method, self);
+      method = FindMethodFromCode<type, access_check>(method_idx, &this_object, &caller_method,
+                                                      self);
       visitor.FixupReferences();
     }
 
     if (UNLIKELY(method == NULL)) {
       CHECK(self->IsExceptionPending());
-      return 0;  // failure
+      return GetFailureValue();  // Failure.
     }
   }
   DCHECK(!self->IsExceptionPending());
@@ -1630,24 +1704,17 @@
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << MethodHelper(method).GetDexFile().GetLocation();
-#ifdef __LP64__
-  UNIMPLEMENTED(FATAL);
-  return 0;
-#else
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  uint64_t result = ((code_uint << 32) | method_uint);
-  return result;
-#endif
+
+  return GetSuccessValue(code, method);
 }
 
 // Explicit artInvokeCommon template function declarations to please analysis tool.
 #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check)                                \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
-  uint64_t artInvokeCommon<type, access_check>(uint32_t method_idx,                             \
-                                               mirror::Object* this_object,                     \
-                                               mirror::ArtMethod* caller_method,                \
-                                               Thread* self, mirror::ArtMethod** sp)            \
+  MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
+                                                    mirror::Object* this_object,                \
+                                                    mirror::ArtMethod* caller_method,           \
+                                                    Thread* self, mirror::ArtMethod** sp)       \
 
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false);
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true);
@@ -1663,57 +1730,57 @@
 
 
 // See comments in runtime_support_asm.S
-extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                mirror::Object* this_object,
-                                                                mirror::ArtMethod* caller_method,
-                                                                Thread* self,
-                                                                mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                     mirror::Object* this_object,
+                                                                     mirror::ArtMethod* caller_method,
+                                                                     Thread* self,
+                                                                     mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kInterface, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 
-extern "C" uint64_t artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
-                                                             mirror::Object* this_object,
-                                                             mirror::ArtMethod* caller_method,
-                                                             Thread* self,
-                                                             mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                  mirror::Object* this_object,
+                                                                  mirror::ArtMethod* caller_method,
+                                                                  Thread* self,
+                                                                  mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
-                                                             mirror::Object* this_object,
-                                                             mirror::ArtMethod* caller_method,
-                                                             Thread* self,
-                                                             mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                  mirror::Object* this_object,
+                                                                  mirror::ArtMethod* caller_method,
+                                                                  Thread* self,
+                                                                  mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
-                                                            mirror::Object* this_object,
-                                                            mirror::ArtMethod* caller_method,
-                                                            Thread* self,
-                                                            mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                 mirror::Object* this_object,
+                                                                 mirror::ArtMethod* caller_method,
+                                                                 Thread* self,
+                                                                 mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
 }
 
-extern "C" uint64_t artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
-                                                              mirror::Object* this_object,
-                                                              mirror::ArtMethod* caller_method,
-                                                              Thread* self,
-                                                              mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
+                                                                   mirror::Object* this_object,
+                                                                   mirror::ArtMethod* caller_method,
+                                                                   Thread* self,
+                                                                   mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 // Determine target of interface dispatch. This object is known non-null.
-extern "C" uint64_t artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
-                                                 mirror::Object* this_object,
-                                                 mirror::ArtMethod* caller_method,
-                                                 Thread* self, mirror::ArtMethod** sp)
+extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
+                                                      mirror::Object* this_object,
+                                                      mirror::ArtMethod* caller_method,
+                                                      Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method;
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
@@ -1722,7 +1789,7 @@
       FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
-      return 0;  // Failure.
+      return GetFailureValue();  // Failure.
     }
   } else {
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
@@ -1812,14 +1879,14 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, false, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<kInterface, false>(dex_method_idx, this_object, caller_method,
+      method = FindMethodFromCode<kInterface, false>(dex_method_idx, &this_object, &caller_method,
                                                      self);
       visitor.FixupReferences();
     }
 
     if (UNLIKELY(method == nullptr)) {
       CHECK(self->IsExceptionPending());
-      return 0;  // Failure.
+      return GetFailureValue();  // Failure.
     }
   }
   const void* code = method->GetEntryPointFromQuickCompiledCode();
@@ -1827,15 +1894,8 @@
   // When we return, the caller will branch to this address, so it had better not be 0!
   DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
       << MethodHelper(method).GetDexFile().GetLocation();
-#ifdef __LP64__
-  UNIMPLEMENTED(FATAL);
-  return 0;
-#else
-  uint32_t method_uint = reinterpret_cast<uint32_t>(method);
-  uint64_t code_uint = reinterpret_cast<uint32_t>(code);
-  uint64_t result = ((code_uint << 32) | method_uint);
-  return result;
-#endif
+
+  return GetSuccessValue(code, method);
 }
 
 }  // namespace art
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 97a8367..751cdb6 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <memory>
+
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -25,9 +27,8 @@
 #include "mirror/stack_trace_element.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
 #include "vmap_table.h"
 
 namespace art {
@@ -38,13 +39,14 @@
     CommonRuntimeTest::SetUp();
 
     ScopedObjectAccess soa(Thread::Current());
-    SirtRef<mirror::ClassLoader> class_loader(
-        soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle")));
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle"))));
     my_klass_ = class_linker_->FindClass(soa.Self(), "LExceptionHandle;", class_loader);
     ASSERT_TRUE(my_klass_ != NULL);
-    SirtRef<mirror::Class> sirt_klass(soa.Self(), my_klass_);
-    class_linker_->EnsureInitialized(sirt_klass, true, true);
-    my_klass_ = sirt_klass.get();
+    Handle<mirror::Class> klass(hs.NewHandle(my_klass_));
+    class_linker_->EnsureInitialized(klass, true, true);
+    my_klass_ = klass.Get();
 
     dex_ = my_klass_->GetDexCache()->GetDexFile();
 
@@ -72,9 +74,10 @@
 
     const std::vector<uint8_t>& fake_vmap_table_data = fake_vmap_table_data_.GetData();
     const std::vector<uint8_t>& fake_mapping_data = fake_mapping_data_.GetData();
-    uint32_t vmap_table_offset = sizeof(OatMethodHeader) + fake_vmap_table_data.size();
+    uint32_t vmap_table_offset = sizeof(OatQuickMethodHeader) + fake_vmap_table_data.size();
     uint32_t mapping_table_offset = vmap_table_offset + fake_mapping_data.size();
-    OatMethodHeader method_header(vmap_table_offset, mapping_table_offset, code_size);
+    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset,
+                                       4 * kPointerSize, 0u, 0u, code_size);
     fake_header_code_and_maps_.resize(sizeof(method_header));
     memcpy(&fake_header_code_and_maps_[0], &method_header, sizeof(method_header));
     fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
@@ -91,13 +94,11 @@
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
     ASSERT_TRUE(method_f_ != NULL);
-    method_f_->SetFrameSizeInBytes(4 * kPointerSize);
     method_f_->SetEntryPointFromQuickCompiledCode(code_ptr);
     method_f_->SetNativeGcMap(&fake_gc_map_[0]);
 
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
     ASSERT_TRUE(method_g_ != NULL);
-    method_g_->SetFrameSizeInBytes(4 * kPointerSize);
     method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
     method_g_->SetNativeGcMap(&fake_gc_map_[0]);
   }
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index b8093bc..8d750c5 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -60,11 +60,15 @@
 }
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
-  LOG(DEBUG) << "Handling fault";
+  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
+  //
+  // If malloc calls abort, it will be holding its lock.
+  // If the handler tries to call malloc, it will deadlock.
+  VLOG(signals) << "Handling fault";
   if (IsInGeneratedCode(context, true)) {
-    LOG(DEBUG) << "in generated code, looking for handler";
+    VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
-      LOG(DEBUG) << "invoking Action on handler " << handler;
+      VLOG(signals) << "invoking Action on handler " << handler;
       if (handler->Action(sig, info, context)) {
         return;
       }
@@ -75,7 +79,7 @@
       return;
     }
   }
-  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
+  VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler";
   oldaction_.sa_sigaction(sig, info, context);
 }
 
@@ -106,23 +110,23 @@
 bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
-  LOG(DEBUG) << "Checking for generated code";
+  VLOG(signals) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
-    LOG(DEBUG) << "no current thread";
+    VLOG(signals) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
-    LOG(DEBUG) << "not runnable";
+    VLOG(signals) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
-    LOG(DEBUG) << "no lock";
+    VLOG(signals) << "no lock";
     return false;
   }
 
@@ -135,9 +139,9 @@
   GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
-  LOG(DEBUG) << "potential method: " << method_obj;
+  VLOG(signals) << "potential method: " << method_obj;
   if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
-    LOG(DEBUG) << "no method";
+    VLOG(signals) << "no method";
     return false;
   }
 
@@ -147,36 +151,36 @@
   // TODO: Method might be not a heap address, and GetClass could fault.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
-    LOG(DEBUG) << "not a class";
+    VLOG(signals) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
-    LOG(DEBUG) << "not aligned";
+    VLOG(signals) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
-    LOG(DEBUG) << "not a class class";
+    VLOG(signals) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
-    LOG(DEBUG) << "not a method";
+    VLOG(signals) << "not a method";
     return false;
   }
 
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
-    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
     const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
     uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
-    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
   uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
-  LOG(DEBUG) << "dexpc: " << dexpc;
+  VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index ea2f7c8..97d3c2f 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -112,7 +112,7 @@
 };
 
 
-// Statically allocated so the the signal handler can get access to it.
+// Statically allocated so the the signal handler can Get access to it.
 extern FaultManager fault_manager;
 
 }       // namespace art
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index c79b586..979970c 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -18,12 +18,12 @@
 #define ART_RUNTIME_GC_ACCOUNTING_ATOMIC_STACK_H_
 
 #include <algorithm>
+#include <memory>
 #include <string>
 
 #include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
-#include "UniquePtr.h"
 #include "mem_map.h"
 #include "utils.h"
 
@@ -36,7 +36,7 @@
  public:
   // Capacity is how many elements we can store in the stack.
   static AtomicStack* Create(const std::string& name, size_t capacity) {
-    UniquePtr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
+    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
     mark_stack->Init();
     return mark_stack.release();
   }
@@ -46,8 +46,8 @@
   void Reset() {
     DCHECK(mem_map_.get() != NULL);
     DCHECK(begin_ != NULL);
-    front_index_ = 0;
-    back_index_ = 0;
+    front_index_.StoreRelaxed(0);
+    back_index_.StoreRelaxed(0);
     debug_is_sorted_ = true;
     int result = madvise(begin_, sizeof(T) * capacity_, MADV_DONTNEED);
     if (result == -1) {
@@ -64,12 +64,12 @@
     }
     int32_t index;
     do {
-      index = back_index_;
+      index = back_index_.LoadRelaxed();
       if (UNLIKELY(static_cast<size_t>(index) >= capacity_)) {
         // Stack overflow.
         return false;
       }
-    } while (!back_index_.CompareAndSwap(index, index + 1));
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
     begin_[index] = value;
     return true;
   }
@@ -83,13 +83,13 @@
     int32_t index;
     int32_t new_index;
     do {
-      index = back_index_;
+      index = back_index_.LoadRelaxed();
       new_index = index + num_slots;
       if (UNLIKELY(static_cast<size_t>(new_index) >= capacity_)) {
         // Stack overflow.
         return false;
       }
-    } while (!back_index_.CompareAndSwap(index, new_index));
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, new_index));
     *start_address = &begin_[index];
     *end_address = &begin_[new_index];
     if (kIsDebugBuild) {
@@ -114,31 +114,31 @@
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
-    int32_t index = back_index_;
+    int32_t index = back_index_.LoadRelaxed();
     DCHECK_LT(static_cast<size_t>(index), capacity_);
-    back_index_ = index + 1;
+    back_index_.StoreRelaxed(index + 1);
     begin_[index] = value;
   }
 
   T PopBack() {
-    DCHECK_GT(back_index_, front_index_);
+    DCHECK_GT(back_index_.LoadRelaxed(), front_index_.LoadRelaxed());
     // Decrement the back index non atomically.
-    back_index_ = back_index_ - 1;
-    return begin_[back_index_];
+    back_index_.StoreRelaxed(back_index_.LoadRelaxed() - 1);
+    return begin_[back_index_.LoadRelaxed()];
   }
 
   // Take an item from the front of the stack.
   T PopFront() {
-    int32_t index = front_index_;
-    DCHECK_LT(index, back_index_.Load());
-    front_index_ = front_index_ + 1;
+    int32_t index = front_index_.LoadRelaxed();
+    DCHECK_LT(index, back_index_.LoadRelaxed());
+    front_index_.StoreRelaxed(index + 1);
     return begin_[index];
   }
 
   // Pop a number of elements.
   void PopBackCount(int32_t n) {
     DCHECK_GE(Size(), static_cast<size_t>(n));
-    back_index_.FetchAndSub(n);
+    back_index_.FetchAndSubSequentiallyConsistent(n);
   }
 
   bool IsEmpty() const {
@@ -146,16 +146,16 @@
   }
 
   size_t Size() const {
-    DCHECK_LE(front_index_, back_index_);
-    return back_index_ - front_index_;
+    DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed());
+    return back_index_.LoadRelaxed() - front_index_.LoadRelaxed();
   }
 
   T* Begin() const {
-    return const_cast<T*>(begin_ + front_index_);
+    return const_cast<T*>(begin_ + front_index_.LoadRelaxed());
   }
 
   T* End() const {
-    return const_cast<T*>(begin_ + back_index_);
+    return const_cast<T*>(begin_ + back_index_.LoadRelaxed());
   }
 
   size_t Capacity() const {
@@ -169,11 +169,11 @@
   }
 
   void Sort() {
-    int32_t start_back_index = back_index_.Load();
-    int32_t start_front_index = front_index_.Load();
+    int32_t start_back_index = back_index_.LoadRelaxed();
+    int32_t start_front_index = front_index_.LoadRelaxed();
     std::sort(Begin(), End());
-    CHECK_EQ(start_back_index, back_index_.Load());
-    CHECK_EQ(start_front_index, front_index_.Load());
+    CHECK_EQ(start_back_index, back_index_.LoadRelaxed());
+    CHECK_EQ(start_front_index, front_index_.LoadRelaxed());
     if (kIsDebugBuild) {
       debug_is_sorted_ = true;
     }
@@ -215,7 +215,7 @@
   std::string name_;
 
   // Memory mapping of the atomic stack.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Back index (index after the last element pushed).
   AtomicInteger back_index_;
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 714e6f7..43a173e 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -55,7 +55,7 @@
   size_t capacity = heap_capacity / kCardSize;
   /* Allocate an extra 256 bytes to allow fixed low-byte of base */
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
                                                  capacity + 256, PROT_READ | PROT_WRITE,
                                                  false, &error_msg));
   CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg;
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 8d5dc07..7934974 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -17,10 +17,11 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_
 
+#include <memory>
+
 #include "base/mutex.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -141,7 +142,7 @@
   void VerifyCardTable();
 
   // Mmapped pages for the card table
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
   // Value used to compute card table addresses from object addresses, see GetBiasedBegin
   byte* const biased_begin_;
   // Card table doesn't begin at the beginning of the mem_map_, instead it is displaced by offset
diff --git a/runtime/gc/accounting/gc_allocator.h b/runtime/gc/accounting/gc_allocator.h
index 7dd7cca..1d96112 100644
--- a/runtime/gc/accounting/gc_allocator.h
+++ b/runtime/gc/accounting/gc_allocator.h
@@ -73,7 +73,7 @@
 // GCAllocatorImpl<T> if kMeasureGCMemoryOverhead is true, std::allocator<T> otherwise.
 template <typename T>
 class GcAllocator : public TypeStaticIf<kMeasureGcMemoryOverhead, GcAllocatorImpl<T>,
-                                        std::allocator<T> >::type {
+                                        std::allocator<T>>::type {
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 7cddaf4..228d1dc 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -16,6 +16,8 @@
 
 #include "mod_union_table.h"
 
+#include <memory>
+
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
@@ -30,7 +32,6 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
 
 using ::art::mirror::Object;
 
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 5ae7c77..449e171 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -50,7 +50,7 @@
 // cleared between GC phases, reducing the number of dirty cards that need to be scanned.
 class ModUnionTable {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*> > CardSet;
+  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
 
   explicit ModUnionTable(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name),
@@ -126,7 +126,7 @@
 
   // Maps from dirty cards to their corresponding alloc space references.
   SafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>, std::less<const byte*>,
-      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*> > > >
+      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>>> >
       references_;
 };
 
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index bbbd1ed..3ff5874 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -16,6 +16,8 @@
 
 #include "remembered_set.h"
 
+#include <memory>
+
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
@@ -30,7 +32,6 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index e3d8537..706cf35 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -43,7 +43,7 @@
 // from the free list spaces to the bump pointer spaces.
 class RememberedSet {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*> > CardSet;
+  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
 
   explicit RememberedSet(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name), heap_(heap), space_(space) {}
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 646fce6..7f1da79 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -19,6 +19,8 @@
 
 #include "space_bitmap.h"
 
+#include <memory>
+
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "heap_bitmap.h"
@@ -28,7 +30,6 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 66f9a3a..8e817e5 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -51,7 +51,7 @@
   // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), nullptr, bitmap_size,
                                                  PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
@@ -226,7 +226,7 @@
 
 template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::InOrderWalk(ObjectCallback* callback, void* arg) {
-  UniquePtr<SpaceBitmap<kAlignment>> visited(
+  std::unique_ptr<SpaceBitmap<kAlignment>> visited(
       Create("bitmap for in-order walk", reinterpret_cast<byte*>(heap_begin_),
              IndexToOffset(bitmap_size_ / kWordSize)));
   CHECK(bitmap_begin_ != nullptr);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index a805809..50d15c6 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -17,17 +17,17 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 
+#include <limits.h>
+#include <stdint.h>
+#include <memory>
+#include <set>
+#include <vector>
+
 #include "base/mutex.h"
 #include "gc_allocator.h"
 #include "globals.h"
 #include "mem_map.h"
 #include "object_callbacks.h"
-#include "UniquePtr.h"
-
-#include <limits.h>
-#include <set>
-#include <stdint.h>
-#include <vector>
 
 namespace art {
 
@@ -217,7 +217,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Backing storage for bitmap.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // This bitmap itself, word sized for efficiency in scanning.
   uword* const bitmap_begin_;
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index 972f94d..a30bb25 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -17,11 +17,11 @@
 #include "space_bitmap.h"
 
 #include <stdint.h>
+#include <memory>
 
 #include "common_runtime_test.h"
 #include "globals.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
 
 namespace art {
 namespace gc {
@@ -32,7 +32,7 @@
 TEST_F(SpaceBitmapTest, Init) {
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
-  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+  std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
       ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 }
@@ -60,7 +60,7 @@
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
 
-  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+  std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
       ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 
@@ -120,7 +120,7 @@
 
 
   for (int i = 0; i < 5 ; ++i) {
-    UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+    std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
         ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
 
     for (int j = 0; j < 10000; ++j) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index f113030..10b88b3 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -584,7 +584,7 @@
   return AllocRun(self, idx);
 }
 
-void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
+inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
   Run* current_run = current_runs_[idx];
   DCHECK(current_run != nullptr);
   void* slot_addr = current_run->AllocSlot();
@@ -793,7 +793,7 @@
     // already in the non-full run set (i.e., it was full) insert it
     // into the non-full run set.
     if (run != current_runs_[idx]) {
-      hash_set<Run*, hash_run, eq_run>* full_runs =
+      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
           kIsDebugBuild ? &full_runs_[idx] : NULL;
       std::set<Run*>::iterator pos = non_full_runs->find(run);
       if (pos == non_full_runs->end()) {
@@ -1156,11 +1156,11 @@
   WriterMutexLock wmu(self, bulk_free_lock_);
 
   // First mark slots to free in the bulk free bit map without locking the
-  // size bracket locks. On host, hash_set is faster than vector + flag.
+  // size bracket locks. On host, unordered_set is faster than vector + flag.
 #ifdef HAVE_ANDROID_OS
   std::vector<Run*> runs;
 #else
-  hash_set<Run*, hash_run, eq_run> runs;
+  std::unordered_set<Run*, hash_run, eq_run> runs;
 #endif
   for (size_t i = 0; i < num_ptrs; i++) {
     void* ptr = ptrs[i];
@@ -1267,7 +1267,7 @@
       // Check if the run should be moved to non_full_runs_ or
       // free_page_runs_.
       std::set<Run*>* non_full_runs = &non_full_runs_[idx];
-      hash_set<Run*, hash_run, eq_run>* full_runs =
+      std::unordered_set<Run*, hash_run, eq_run>* full_runs =
           kIsDebugBuild ? &full_runs_[idx] : NULL;
       if (run->IsAllFree()) {
         // It has just become completely free. Free the pages of the
@@ -1281,7 +1281,7 @@
           // If it was full, remove it from the full run set (debug
           // only.)
           if (kIsDebugBuild) {
-            hash_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
             DCHECK(pos != full_runs->end());
             full_runs->erase(pos);
             if (kTraceRosAlloc) {
@@ -2054,7 +2054,7 @@
       } else {
         // If it's full, it must in the full run set (debug build only.)
         if (kIsDebugBuild) {
-          hash_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
           CHECK(full_runs.find(this) != full_runs.end())
               << " A full run isn't in the full run set " << Dump();
         }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 21044f3..9464331 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -17,43 +17,22 @@
 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
 
-#include <set>
 #include <stdint.h>
 #include <stdlib.h>
-#include <string>
 #include <sys/mman.h>
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "base/mutex.h"
 #include "base/logging.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtr.h"
 #include "utils.h"
 
-// A boilerplate to use hash_map/hash_set both on host and device.
-#ifdef HAVE_ANDROID_OS
-#include <hash_map>
-#include <hash_set>
-using std::hash_map;
-using std::hash_set;
-#else  // HAVE_ANDROID_OS
-#ifdef __DEPRECATED
-#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
-#undef __DEPRECATED
-#endif
-#include <ext/hash_map>
-#include <ext/hash_set>
-#ifdef ROSALLOC_OLD__DEPRECATED
-#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
-#undef ROSALLOC_OLD__DEPRECATED
-#endif
-using __gnu_cxx::hash_map;
-using __gnu_cxx::hash_set;
-#endif  // HAVE_ANDROID_OS
-
 namespace art {
-
 namespace gc {
 namespace allocator {
 
@@ -462,7 +441,7 @@
   std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
   // The run sets that hold the runs whose slots are all full. This is
   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
-  hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  std::unordered_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
   // The set of free pages.
   std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
   // The dedicated full run, it is always full and shared by all threads when revoking happens.
@@ -490,7 +469,7 @@
   byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
   size_t page_map_size_;
   size_t max_page_map_size_;
-  UniquePtr<MemMap> page_map_mem_map_;
+  std::unique_ptr<MemMap> page_map_mem_map_;
 
   // The table that indicates the size of free page runs. These sizes
   // are stored here to avoid storing in the free page header and
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index f9a6abe..16add0b 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -128,6 +128,18 @@
   return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1);
 }
 
+void GarbageCollector::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
+  freed_objects_ += freed_objects;
+  freed_bytes_ += freed_bytes;
+  GetHeap()->RecordFree(freed_objects, freed_bytes);
+}
+
+void GarbageCollector::RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes) {
+  freed_large_objects_ += freed_objects;
+  freed_large_object_bytes_ += freed_bytes;
+  GetHeap()->RecordFree(freed_objects, freed_bytes);
+}
+
 void GarbageCollector::ResetMeasurements() {
   cumulative_timings_.Reset();
   pause_histogram_.Reset();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index ca4a1d5..02dd4d9 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -88,19 +88,19 @@
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
   void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  size_t GetFreedBytes() const {
+  int64_t GetFreedBytes() const {
     return freed_bytes_;
   }
 
-  size_t GetFreedLargeObjectBytes() const {
+  int64_t GetFreedLargeObjectBytes() const {
     return freed_large_object_bytes_;
   }
 
-  size_t GetFreedObjects() const {
+  uint64_t GetFreedObjects() const {
     return freed_objects_;
   }
 
-  size_t GetFreedLargeObjects() const {
+  uint64_t GetFreedLargeObjects() const {
     return freed_large_objects_;
   }
 
@@ -108,7 +108,7 @@
     return pause_histogram_.Sum();
   }
 
-  uint64_t GetTotalFreedBytes() const {
+  int64_t GetTotalFreedBytes() const {
     return total_freed_bytes_;
   }
 
@@ -141,6 +141,11 @@
   // Revoke all the thread-local buffers.
   virtual void RevokeAllThreadLocalBuffers() = 0;
 
+  // Record that you have freed some objects or large objects, calls Heap::RecordFree.
+  // TODO: These are not thread safe, add a lock if we get parallel sweeping.
+  void RecordFree(uint64_t freed_objects, int64_t freed_bytes);
+  void RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes);
+
   static constexpr size_t kPauseBucketSize = 500;
   static constexpr size_t kPauseBucketCount = 32;
 
@@ -158,13 +163,14 @@
   Histogram<uint64_t> pause_histogram_;
   uint64_t total_time_ns_;
   uint64_t total_freed_objects_;
-  uint64_t total_freed_bytes_;
+  int64_t total_freed_bytes_;
 
-  // Single GC statitstics.
-  AtomicInteger freed_bytes_;
-  AtomicInteger freed_large_object_bytes_;
-  AtomicInteger freed_objects_;
-  AtomicInteger freed_large_objects_;
+  // Single GC statitstics, freed bytes are signed since the GC can free negative bytes if it
+  // promotes objects to a space which has a larger allocation size.
+  int64_t freed_bytes_;
+  int64_t freed_large_object_bytes_;
+  uint64_t freed_objects_;
+  uint64_t freed_large_objects_;
 
   CumulativeLogger cumulative_timings_;
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index e225d5a..43331c3 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -31,6 +31,7 @@
 #include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -98,31 +99,32 @@
     : GarbageCollector(heap,
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
+      current_space_bitmap_(nullptr), mark_bitmap_(nullptr), mark_stack_(nullptr),
       gc_barrier_(new Barrier(0)),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
-      is_concurrent_(is_concurrent) {
+      is_concurrent_(is_concurrent), live_stack_freeze_size_(0) {
 }
 
 void MarkSweep::InitializePhase() {
   TimingLogger::ScopedSplit split("InitializePhase", &timings_);
-  mark_stack_ = heap_->mark_stack_.get();
+  mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
-  class_count_ = 0;
-  array_count_ = 0;
-  other_count_ = 0;
-  large_object_test_ = 0;
-  large_object_mark_ = 0;
-  overhead_time_ = 0;
-  work_chunks_created_ = 0;
-  work_chunks_deleted_ = 0;
-  reference_count_ = 0;
-  mark_null_count_ = 0;
-  mark_immune_count_ = 0;
-  mark_fastpath_count_ = 0;
-  mark_slowpath_count_ = 0;
+  class_count_.StoreRelaxed(0);
+  array_count_.StoreRelaxed(0);
+  other_count_.StoreRelaxed(0);
+  large_object_test_.StoreRelaxed(0);
+  large_object_mark_.StoreRelaxed(0);
+  overhead_time_ .StoreRelaxed(0);
+  work_chunks_created_.StoreRelaxed(0);
+  work_chunks_deleted_.StoreRelaxed(0);
+  reference_count_.StoreRelaxed(0);
+  mark_null_count_.StoreRelaxed(0);
+  mark_immune_count_.StoreRelaxed(0);
+  mark_fastpath_count_.StoreRelaxed(0);
+  mark_slowpath_count_.StoreRelaxed(0);
   {
-    // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
+    // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     mark_bitmap_ = heap_->GetMarkBitmap();
   }
@@ -166,18 +168,9 @@
 void MarkSweep::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &IsMarkedCallback,
-                               &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
-}
-
-void MarkSweep::PreProcessReferences() {
-  if (IsConcurrent()) {
-    // No reason to do this for non-concurrent GC since pre processing soft references only helps
-    // pauses.
-    timings_.NewSplit("PreProcessReferences");
-    GetHeap()->ProcessSoftReferences(timings_, clear_soft_references_, &IsMarkedCallback,
-                                     &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
-  }
+  GetHeap()->GetReferenceProcessor()->ProcessReferences(
+      true, &timings_, clear_soft_references_, &IsMarkedCallback, &MarkObjectCallback,
+      &ProcessMarkStackCallback, this);
 }
 
 void MarkSweep::PausePhase() {
@@ -192,7 +185,6 @@
     // Scan dirty objects, this is only required if we are not doing concurrent GC.
     RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty);
   }
-  ProcessReferences(self);
   {
     TimingLogger::ScopedSplit split("SwapStacks", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -210,6 +202,9 @@
   // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
   // reference to a string that is about to be swept.
   Runtime::Current()->DisallowNewSystemWeaks();
+  // Enable the reference processing slow path, needs to be done with mutators paused since there
+  // is no lock in the GetReferent fast path.
+  GetHeap()->GetReferenceProcessor()->EnableSlowPath();
 }
 
 void MarkSweep::PreCleanCards() {
@@ -265,7 +260,6 @@
   MarkReachableObjects();
   // Pre-clean dirtied cards to reduce pauses.
   PreCleanCards();
-  PreProcessReferences();
 }
 
 void MarkSweep::UpdateAndMarkModUnion() {
@@ -290,6 +284,8 @@
 void MarkSweep::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
+  // Process the references concurrently.
+  ProcessReferences(self);
   SweepSystemWeaks(self);
   Runtime::Current()->AllowNewSystemWeaks();
   {
@@ -383,7 +379,7 @@
       ++mark_sweep_->large_object_mark_;
     }
     space::LargeObjectSpace* large_object_space = mark_sweep_->GetHeap()->GetLargeObjectsSpace();
-    if (UNLIKELY(!IsAligned<kPageSize>(obj) ||
+    if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) ||
                  (kIsDebugBuild && !large_object_space->Contains(obj)))) {
       LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces";
       LOG(ERROR) << "Attempting see if it's a bad root";
@@ -601,7 +597,7 @@
         if (kUseFinger) {
           android_memory_barrier();
           if (reinterpret_cast<uintptr_t>(ref) >=
-              static_cast<uintptr_t>(mark_sweep_->atomic_finger_)) {
+              static_cast<uintptr_t>(mark_sweep_->atomic_finger_.LoadRelaxed())) {
             return;
           }
         }
@@ -886,7 +882,7 @@
           // This function does not handle heap end increasing, so we must use the space end.
           uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
           uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-          atomic_finger_ = static_cast<int32_t>(0xFFFFFFFF);
+          atomic_finger_.StoreRelaxed(AtomicInteger::MaxValue());
 
           // Create a few worker tasks.
           const size_t n = thread_count * 2;
@@ -1116,13 +1112,10 @@
   timings_.EndSplit();
 
   timings_.StartSplit("RecordFree");
-  VLOG(heap) << "Freed " << freed_objects << "/" << count
-             << " objects with size " << PrettySize(freed_bytes);
-  heap_->RecordFree(freed_objects + freed_large_objects, freed_bytes + freed_large_object_bytes);
-  freed_objects_.FetchAndAdd(freed_objects);
-  freed_large_objects_.FetchAndAdd(freed_large_objects);
-  freed_bytes_.FetchAndAdd(freed_bytes);
-  freed_large_object_bytes_.FetchAndAdd(freed_large_object_bytes);
+  VLOG(heap) << "Freed " << freed_objects << "/" << count << " objects with size "
+             << PrettySize(freed_bytes);
+  RecordFree(freed_objects, freed_bytes);
+  RecordFreeLargeObjects(freed_large_objects, freed_large_object_bytes);
   timings_.EndSplit();
 
   timings_.StartSplit("ResetStack");
@@ -1150,9 +1143,7 @@
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
       alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-      heap_->RecordFree(freed_objects, freed_bytes);
-      freed_objects_.FetchAndAdd(freed_objects);
-      freed_bytes_.FetchAndAdd(freed_bytes);
+      RecordFree(freed_objects, freed_bytes);
     }
   }
   SweepLargeObjects(swap_bitmaps);
@@ -1163,9 +1154,7 @@
   size_t freed_objects = 0;
   size_t freed_bytes = 0;
   heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-  freed_large_objects_.FetchAndAdd(freed_objects);
-  freed_large_object_bytes_.FetchAndAdd(freed_bytes);
-  heap_->RecordFree(freed_objects, freed_bytes);
+  RecordFreeLargeObjects(freed_objects, freed_bytes);
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
@@ -1175,7 +1164,7 @@
   if (kCountJavaLangRefs) {
     ++reference_count_;
   }
-  heap_->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
@@ -1205,8 +1194,8 @@
   ScanObjectVisit(obj, mark_visitor, ref_visitor);
 }
 
-void MarkSweep::ProcessMarkStackPausedCallback(void* arg) {
-  reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(true);
+void MarkSweep::ProcessMarkStackCallback(void* arg) {
+  reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(false);
 }
 
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
@@ -1226,7 +1215,9 @@
   thread_pool->Wait(self, true, true);
   thread_pool->StopWorkers(self);
   mark_stack_->Reset();
-  CHECK_EQ(work_chunks_created_, work_chunks_deleted_) << " some of the work chunks were leaked";
+  CHECK_EQ(work_chunks_created_.LoadSequentiallyConsistent(),
+           work_chunks_deleted_.LoadSequentiallyConsistent())
+      << " some of the work chunks were leaked";
 }
 
 // Scan anything that's on the mark stack.
@@ -1281,24 +1272,27 @@
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   if (kCountScannedTypes) {
-    VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
-             << " other=" << other_count_;
+    VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed()
+        << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed();
   }
   if (kCountTasks) {
-    VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_;
+    VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_.LoadRelaxed();
   }
   if (kMeasureOverhead) {
-    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_);
+    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_.LoadRelaxed());
   }
   if (kProfileLargeObjects) {
-    VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_;
+    VLOG(gc) << "Large objects tested " << large_object_test_.LoadRelaxed()
+        << " marked " << large_object_mark_.LoadRelaxed();
   }
   if (kCountJavaLangRefs) {
-    VLOG(gc) << "References scanned " << reference_count_;
+    VLOG(gc) << "References scanned " << reference_count_.LoadRelaxed();
   }
   if (kCountMarkedObjects) {
-    VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" <<  mark_immune_count_
-        << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_;
+    VLOG(gc) << "Marked: null=" << mark_null_count_.LoadRelaxed()
+        << " immune=" <<  mark_immune_count_.LoadRelaxed()
+        << " fastpath=" << mark_fastpath_count_.LoadRelaxed()
+        << " slowpath=" << mark_slowpath_count_.LoadRelaxed();
   }
   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bfc70d1..d73bf3f 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 #define ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 
+#include <memory>
+
 #include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
@@ -26,7 +28,6 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -123,10 +124,6 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PreProcessReferences()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Update and mark references from immune spaces.
   void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -191,8 +188,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ProcessMarkStackPausedCallback(void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  static void ProcessMarkStackCallback(void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                        RootType root_type)
@@ -267,7 +265,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Used to get around thread safety annotations. The call is from MarkingPhase and is guarded by
+  // Used to Get around thread safety annotations. The call is from MarkingPhase and is guarded by
   // IsExclusiveHeld.
   void RevokeAllThreadLocalAllocationStacks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
 
@@ -307,14 +305,14 @@
   AtomicInteger mark_fastpath_count_;
   AtomicInteger mark_slowpath_count_;
 
-  // Verification.
-  size_t live_stack_freeze_size_;
-
-  UniquePtr<Barrier> gc_barrier_;
+  std::unique_ptr<Barrier> gc_barrier_;
   Mutex mark_stack_lock_ ACQUIRED_AFTER(Locks::classlinker_classes_lock_);
 
   const bool is_concurrent_;
 
+  // Verification.
+  size_t live_stack_freeze_size_;
+
  private:
   friend class AddIfReachesAllocSpaceVisitor;  // Used by mod-union table.
   friend class CardScanTask;
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 55140f6..47682cc 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -50,7 +50,7 @@
   return reinterpret_cast<mirror::Object*>(lock_word.ForwardingAddress());
 }
 
-// Used to mark and copy objects. Any newly-marked objects who are in the from space get moved to
+// Used to mark and copy objects. Any newly-marked objects who are in the from space Get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
 template<bool kPoisonReferences>
@@ -72,7 +72,7 @@
         forward_address = MarkNonForwardedObject(obj);
         DCHECK(forward_address != nullptr);
         // Make sure to only update the forwarding address AFTER you copy the object so that the
-        // monitor word doesn't get stomped over.
+        // monitor word doesn't Get stomped over.
         obj->SetLockWord(
             LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)), false);
         // Push the object onto the mark stack for later processing.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index b53ee10..d4e26ab 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -30,6 +30,7 @@
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/reference_processor.h"
 #include "gc/space/bump_pointer_space.h"
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/image_space.h"
@@ -64,8 +65,8 @@
 
 static constexpr bool kProtectFromSpace = true;
 static constexpr bool kStoreStackTraces = false;
-static constexpr bool kUseBytesPromoted = true;
 static constexpr size_t kBytesPromotedThreshold = 4 * MB;
+static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
@@ -104,8 +105,8 @@
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
       bytes_promoted_since_last_whole_heap_collection_(0),
+      large_object_bytes_allocated_at_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0),
       collector_name_(name_),
       swap_semi_spaces_(true) {
 }
@@ -141,7 +142,7 @@
 
 void SemiSpace::InitializePhase() {
   TimingLogger::ScopedSplit split("InitializePhase", &timings_);
-  mark_stack_ = heap_->mark_stack_.get();
+  mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
   is_large_object_space_immune_ = false;
@@ -153,7 +154,7 @@
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
   {
-    // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
+    // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     mark_bitmap_ = heap_->GetMarkBitmap();
   }
@@ -162,15 +163,16 @@
 void SemiSpace::ProcessReferences(Thread* self) {
   TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
-                               &MarkObjectCallback, &ProcessMarkStackCallback, this);
+  GetHeap()->GetReferenceProcessor()->ProcessReferences(
+      false, &timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+      &MarkObjectCallback, &ProcessMarkStackCallback, this);
 }
 
 void SemiSpace::MarkingPhase() {
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self_));
   if (kStoreStackTraces) {
     Locks::mutator_lock_->AssertExclusiveHeld(self_);
-    // Store the stack traces into the runtime fault string in case we get a heap corruption
+    // Store the stack traces into the runtime fault string in case we Get a heap corruption
     // related crash later.
     ThreadState old_state = self_->SetStateUnsafe(kRunnable);
     std::ostringstream oss;
@@ -187,12 +189,8 @@
     if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
         clear_soft_references_) {
       // If an explicit, native allocation-triggered, or last attempt
-      // collection, collect the whole heap (and reset the interval
-      // counter to be consistent.)
+      // collection, collect the whole heap.
       whole_heap_collection_ = true;
-      if (!kUseBytesPromoted) {
-        whole_heap_collection_interval_counter_ = 0;
-      }
     }
     if (whole_heap_collection_) {
       VLOG(heap) << "Whole heap collection";
@@ -233,7 +231,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(timings_, kUseRememberedSet && generational_);
-  // Clear the whole card table since we can not get any additional dirty cards during the
+  // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   timings_.NewSplit("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
@@ -262,27 +260,18 @@
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
   // Record freed memory.
-  uint64_t from_bytes = from_space_->GetBytesAllocated();
-  uint64_t to_bytes = bytes_moved_;
-  uint64_t from_objects = from_space_->GetObjectsAllocated();
-  uint64_t to_objects = objects_moved_;
+  const int64_t from_bytes = from_space_->GetBytesAllocated();
+  const int64_t to_bytes = bytes_moved_;
+  const uint64_t from_objects = from_space_->GetObjectsAllocated();
+  const uint64_t to_objects = objects_moved_;
   CHECK_LE(to_objects, from_objects);
-  int64_t freed_bytes = from_bytes - to_bytes;
-  int64_t freed_objects = from_objects - to_objects;
-  freed_bytes_.FetchAndAdd(freed_bytes);
-  freed_objects_.FetchAndAdd(freed_objects);
   // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
   // space.
-  heap_->RecordFree(freed_objects, freed_bytes);
-
+  RecordFree(from_objects - to_objects, from_bytes - to_bytes);
   // Clear and protect the from space.
   from_space_->Clear();
-  VLOG(heap) << "Protecting space " << *from_space_;
-  if (kProtectFromSpace) {
-    from_space_->GetMemMap()->Protect(PROT_NONE);
-  } else {
-    from_space_->GetMemMap()->Protect(PROT_READ);
-  }
+  VLOG(heap) << "Protecting from_space_: " << *from_space_;
+  from_space_->GetMemMap()->Protect(kProtectFromSpace ? PROT_NONE : PROT_READ);
   if (swap_semi_spaces_) {
     heap_->SwapSemiSpaces();
   }
@@ -601,6 +590,7 @@
     // If it's allocated after the last GC (younger), copy it to the to-space.
     forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
   }
+  CHECK(forward_address != nullptr) << "Out of memory in the to-space.";
   ++objects_moved_;
   bytes_moved_ += bytes_allocated;
   // Copy over the object and add it to the mark stack since we still need to update its
@@ -687,9 +677,7 @@
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
       alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-      heap_->RecordFree(freed_objects, freed_bytes);
-      freed_objects_.FetchAndAdd(freed_objects);
-      freed_bytes_.FetchAndAdd(freed_bytes);
+      RecordFree(freed_objects, freed_bytes);
     }
   }
   if (!is_large_object_space_immune_) {
@@ -703,15 +691,14 @@
   size_t freed_objects = 0;
   size_t freed_bytes = 0;
   heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
-  freed_large_objects_.FetchAndAdd(freed_objects);
-  freed_large_object_bytes_.FetchAndAdd(freed_bytes);
-  heap_->RecordFree(freed_objects, freed_bytes);
+  RecordFreeLargeObjects(freed_objects, freed_bytes);
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
-  heap_->DelayReferenceReferent(klass, reference, MarkedForwardingAddressCallback, this);
+  heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference,
+                                                         MarkedForwardingAddressCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
@@ -811,32 +798,27 @@
     // only space collection at the next collection by updating
     // whole_heap_collection.
     if (!whole_heap_collection_) {
-      if (!kUseBytesPromoted) {
-        // Enable whole_heap_collection once every
-        // kDefaultWholeHeapCollectionInterval collections.
-        --whole_heap_collection_interval_counter_;
-        DCHECK_GE(whole_heap_collection_interval_counter_, 0);
-        if (whole_heap_collection_interval_counter_ == 0) {
-          whole_heap_collection_ = true;
-        }
-      } else {
-        // Enable whole_heap_collection if the bytes promoted since
-        // the last whole heap collection exceeds a threshold.
-        bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
-        if (bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold) {
-          whole_heap_collection_ = true;
-        }
+      // Enable whole_heap_collection if the bytes promoted since the
+      // last whole heap collection or the large object bytes
+      // allocated exceeds a threshold.
+      bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
+      bool bytes_promoted_threshold_exceeded =
+          bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold;
+      uint64_t current_los_bytes_allocated = GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      uint64_t last_los_bytes_allocated =
+          large_object_bytes_allocated_at_last_whole_heap_collection_;
+      bool large_object_bytes_threshold_exceeded =
+          current_los_bytes_allocated >=
+          last_los_bytes_allocated + kLargeObjectBytesAllocatedThreshold;
+      if (bytes_promoted_threshold_exceeded || large_object_bytes_threshold_exceeded) {
+        whole_heap_collection_ = true;
       }
     } else {
-      if (!kUseBytesPromoted) {
-        DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
-        whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
-        whole_heap_collection_ = false;
-      } else {
-        // Reset it.
-        bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
-        whole_heap_collection_ = false;
-      }
+      // Reset the counters.
+      bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
+      large_object_bytes_allocated_at_last_whole_heap_collection_ =
+          GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+      whole_heap_collection_ = false;
     }
   }
   // Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3b3e1b1..a95abe4 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 #define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 
+#include <memory>
+
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -25,7 +27,6 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -234,15 +235,15 @@
   // the non-moving space, since the last whole heap collection.
   uint64_t bytes_promoted_since_last_whole_heap_collection_;
 
+  // Used for the generational mode. Keeps track of how many bytes of
+  // large objects were allocated at the last whole heap collection.
+  uint64_t large_object_bytes_allocated_at_last_whole_heap_collection_;
+
   // Used for the generational mode. When true, collect the whole
   // heap. When false, collect only the bump pointer spaces.
   bool whole_heap_collection_;
 
-  // Used for the generational mode. A counter used to enable
-  // whole_heap_collection_ once per interval.
-  int whole_heap_collection_interval_counter_;
-
-  // How many objects and bytes we moved, used so that we don't need to get the size of the
+  // How many objects and bytes we moved, used so that we don't need to Get the size of the
   // to_space_ when calculating how many objects and bytes we freed.
   size_t bytes_moved_;
   size_t objects_moved_;
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index ce51ac5..5a58446 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -49,7 +49,7 @@
 
 void StickyMarkSweep::MarkReachableObjects() {
   // All reachable objects must be referenced by a root or a dirty card, so we can clear the mark
-  // stack here since all objects in the mark stack will get scanned by the card scanning anyways.
+  // stack here since all objects in the mark stack will Get scanned by the card scanning anyways.
   // TODO: Not put these objects in the mark stack in the first place.
   mark_stack_->Reset();
   RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index b25f7ff..9e73f14 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -29,7 +29,9 @@
     case kGcCauseBackground: return "Background";
     case kGcCauseExplicit: return "Explicit";
     case kGcCauseForNativeAlloc: return "NativeAlloc";
-    case kGcCauseCollectorTransition: return" CollectorTransition";
+    case kGcCauseCollectorTransition: return "CollectorTransition";
+    case kGcCauseDisableMovingGc: return "DisableMovingGc";
+    case kGcCauseTrim: return "HeapTrim";
     default:
       LOG(FATAL) << "Unreachable";
   }
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 7499b9e..10e6667 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -35,6 +35,10 @@
   kGcCauseForNativeAlloc,
   // GC triggered for a collector transition.
   kGcCauseCollectorTransition,
+  // Not a real GC cause, used when we disable moving GC (currently for GetPrimitiveArrayCritical).
+  kGcCauseDisableMovingGc,
+  // Not a real GC cause, used when we trim the heap.
+  kGcCauseTrim,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index a06f272..03b72b6 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -27,7 +27,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "runtime.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "thread-inl.h"
 #include "verify_object-inl.h"
@@ -96,7 +96,7 @@
     CHECK_LE(obj->SizeOf(), usable_size);
   }
   const size_t new_num_bytes_allocated =
-      static_cast<size_t>(num_bytes_allocated_.FetchAndAdd(bytes_allocated)) + bytes_allocated;
+      static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) + bytes_allocated;
   // TODO: Deprecate.
   if (kInstrumented) {
     if (Runtime::Current()->HasStatsEnabled()) {
@@ -144,10 +144,10 @@
       mirror::Object** end_address;
       while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize,
                                                 &start_address, &end_address)) {
-        // Disable verify object in SirtRef as obj isn't on the alloc stack yet.
-        SirtRefNoVerify<mirror::Object> ref(self, *obj);
+        // TODO: Add handle VerifyObject.
+        StackHandleScope<1> hs(self);
+        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
         CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-        *obj = ref.get();
       }
       self->SetThreadLocalAllocationStack(start_address, end_address);
       // Retry on the new thread-local allocation stack.
@@ -159,10 +159,10 @@
     // This is safe to do since the GC will never free objects which are neither in the allocation
     // stack or the live bitmap.
     while (!allocation_stack_->AtomicPushBack(*obj)) {
-      // Disable verify object in SirtRef as obj isn't on the alloc stack yet.
-      SirtRefNoVerify<mirror::Object> ref(self, *obj);
+      // TODO: Add handle VerifyObject.
+      StackHandleScope<1> hs(self);
+      HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
       CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-      *obj = ref.get();
     }
   }
 }
@@ -264,7 +264,7 @@
     // Only if the allocation succeeded, record the time.
     if (allocated_obj != nullptr) {
       uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
-      heap_->total_allocation_time_.FetchAndAdd(allocation_end_time - allocation_start_time_);
+      heap_->total_allocation_time_.FetchAndAddSequentiallyConsistent(allocation_end_time - allocation_start_time_);
     }
   }
 };
@@ -279,7 +279,7 @@
 
 template <bool kGrow>
 inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
-  size_t new_footprint = num_bytes_allocated_ + alloc_size;
+  size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
@@ -300,11 +300,7 @@
 inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
                                     mirror::Object** obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
-    // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
-    SirtRef<mirror::Object> ref(self, *obj);
-    RequestConcurrentGC(self);
-    // Restore obj in case it moved.
-    *obj = ref.get();
+    RequestConcurrentGCAndSaveObject(self, obj);
   }
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 5d517bb..fdc4367 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -20,6 +20,7 @@
 #include <cutils/trace.h>
 
 #include <limits>
+#include <memory>
 #include <vector>
 
 #include "base/histogram-inl.h"
@@ -39,6 +40,7 @@
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/semi_space.h"
 #include "gc/collector/sticky_mark_sweep.h"
+#include "gc/reference_processor.h"
 #include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
@@ -61,9 +63,8 @@
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread_list.h"
-#include "UniquePtr.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -115,6 +116,7 @@
       long_pause_log_threshold_(long_pause_log_threshold),
       long_gc_log_threshold_(long_gc_log_threshold),
       ignore_max_footprint_(ignore_max_footprint),
+      zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
       have_zygote_space_(false),
       large_object_threshold_(std::numeric_limits<size_t>::max()),  // Starts out disabled.
       collector_type_running_(kCollectorTypeNone),
@@ -175,7 +177,7 @@
     large_object_threshold_ = kDefaultLargeObjectThreshold;
     // Background compaction is currently not supported for command line runs.
     if (background_collector_type_ != foreground_collector_type_) {
-      LOG(WARNING) << "Disabling background compaction for non zygote";
+      VLOG(heap) << "Disabling background compaction for non zygote";
       background_collector_type_ = foreground_collector_type_;
     }
   }
@@ -291,7 +293,7 @@
   }
 
   // TODO: Count objects in the image space here.
-  num_bytes_allocated_ = 0;
+  num_bytes_allocated_.StoreRelaxed(0);
 
   // Default mark stack size in bytes.
   static const size_t default_mark_stack_size = 64 * KB;
@@ -367,7 +369,7 @@
     dlmalloc_space_ = space::DlMallocSpace::CreateFromMemMap(
         mem_map, "main dlmalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
         can_move_objects);
-    main_space_ = rosalloc_space_;
+    main_space_ = dlmalloc_space_;
     CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
@@ -498,7 +500,7 @@
   MutexLock mu(self, *gc_complete_lock_);
   ++disable_moving_gc_count_;
   if (IsMovingGc(collector_type_running_)) {
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(kGcCauseDisableMovingGc, self);
   }
 }
 
@@ -657,13 +659,13 @@
 
 void Heap::RegisterGCAllocation(size_t bytes) {
   if (this != nullptr) {
-    gc_memory_overhead_.FetchAndAdd(bytes);
+    gc_memory_overhead_.FetchAndAddSequentiallyConsistent(bytes);
   }
 }
 
 void Heap::RegisterGCDeAllocation(size_t bytes) {
   if (this != nullptr) {
-    gc_memory_overhead_.FetchAndSub(bytes);
+    gc_memory_overhead_.FetchAndSubSequentiallyConsistent(bytes);
   }
 }
 
@@ -698,7 +700,8 @@
     }
     collector->ResetMeasurements();
   }
-  uint64_t allocation_time = static_cast<uint64_t>(total_allocation_time_) * kTimeAdjust;
+  uint64_t allocation_time =
+      static_cast<uint64_t>(total_allocation_time_.LoadRelaxed()) * kTimeAdjust;
   if (total_duration != 0) {
     const double total_seconds = static_cast<double>(total_duration / 1000) / 1000000.0;
     os << "Total time spent in GC: " << PrettyDuration(total_duration) << "\n";
@@ -718,7 +721,7 @@
   }
   os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n";
   os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n";
-  os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_;
+  os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_.LoadRelaxed();
   BaseMutex::DumpAll(os);
 }
 
@@ -771,102 +774,6 @@
   return FindDiscontinuousSpaceFromObject(obj, true);
 }
 
-struct SoftReferenceArgs {
-  IsMarkedCallback* is_marked_callback_;
-  MarkObjectCallback* mark_callback_;
-  void* arg_;
-};
-
-mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
-  SoftReferenceArgs* args = reinterpret_cast<SoftReferenceArgs*>(arg);
-  // TODO: Not preserve all soft references.
-  return args->mark_callback_(obj, args->arg_);
-}
-
-void Heap::ProcessSoftReferences(TimingLogger& timings, bool clear_soft,
-                                 IsMarkedCallback* is_marked_callback,
-                                 MarkObjectCallback* mark_object_callback,
-                                 ProcessMarkStackCallback* process_mark_stack_callback, void* arg) {
-  // Unless required to clear soft references with white references, preserve some white referents.
-  if (!clear_soft) {
-    // Don't clear for sticky GC.
-    SoftReferenceArgs soft_reference_args;
-    soft_reference_args.is_marked_callback_ = is_marked_callback;
-    soft_reference_args.mark_callback_ = mark_object_callback;
-    soft_reference_args.arg_ = arg;
-    // References with a marked referent are removed from the list.
-    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
-                                                     &soft_reference_args);
-    process_mark_stack_callback(arg);
-  }
-}
-
-// Process reference class instances and schedule finalizations.
-void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
-                             IsMarkedCallback* is_marked_callback,
-                             MarkObjectCallback* mark_object_callback,
-                             ProcessMarkStackCallback* process_mark_stack_callback, void* arg) {
-  timings.StartSplit("(Paused)ProcessReferences");
-  ProcessSoftReferences(timings, clear_soft, is_marked_callback, mark_object_callback,
-                        process_mark_stack_callback, arg);
-  // Clear all remaining soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  timings.EndSplit();
-  // Preserve all white objects with finalize methods and schedule them for finalization.
-  timings.StartSplit("(Paused)EnqueueFinalizerReferences");
-  finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
-                                                        mark_object_callback, arg);
-  process_mark_stack_callback(arg);
-  timings.EndSplit();
-  timings.StartSplit("(Paused)ProcessReferences");
-  // Clear all f-reachable soft and weak references with white referents.
-  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  // Clear all phantom references with white referents.
-  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
-  // At this point all reference queues other than the cleared references should be empty.
-  DCHECK(soft_reference_queue_.IsEmpty());
-  DCHECK(weak_reference_queue_.IsEmpty());
-  DCHECK(finalizer_reference_queue_.IsEmpty());
-  DCHECK(phantom_reference_queue_.IsEmpty());
-  timings.EndSplit();
-}
-
-// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
-// marked, put it on the appropriate list in the heap for later processing.
-void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                                  IsMarkedCallback is_marked_callback, void* arg) {
-  // klass can be the class of the old object if the visitor already updated the class of ref.
-  DCHECK(klass->IsReferenceClass());
-  mirror::Object* referent = ref->GetReferent();
-  if (referent != nullptr) {
-    mirror::Object* forward_address = is_marked_callback(referent, arg);
-    // Null means that the object is not currently marked.
-    if (forward_address == nullptr) {
-      Thread* self = Thread::Current();
-      // TODO: Remove these locks, and use atomic stacks for storing references?
-      // We need to check that the references haven't already been enqueued since we can end up
-      // scanning the same reference multiple times due to dirty cards.
-      if (klass->IsSoftReferenceClass()) {
-        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsWeakReferenceClass()) {
-        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsFinalizerReferenceClass()) {
-        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else if (klass->IsPhantomReferenceClass()) {
-        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
-      } else {
-        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
-                   << klass->GetAccessFlags();
-      }
-    } else if (referent != forward_address) {
-      // Referent is already marked and we need to update it.
-      ref->SetReferent<false>(forward_address);
-    }
-  }
-}
-
 space::ImageSpace* Heap::GetImageSpace() const {
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace()) {
@@ -962,7 +869,7 @@
     // trimming.
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(kGcCauseTrim, self);
     collector_type_running_ = kCollectorTypeHeapTrim;
   }
   uint64_t start_ns = NanoTime();
@@ -986,10 +893,16 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
+  size_t native_reclaimed = 0;
+#if defined(USE_DLMALLOC)
   // Trim the native heap.
   dlmalloc_trim(0);
-  size_t native_reclaimed = 0;
   dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+#elif defined(USE_JEMALLOC)
+  // Jemalloc does it's own internal trimming.
+#else
+  UNIMPLEMENTED(WARNING) << "Add trimming support";
+#endif
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
@@ -1116,7 +1029,7 @@
     return;
   }
   // Ignore early dawn of the universe verifications.
-  if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) {
+  if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) {
     return;
   }
   CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj;
@@ -1143,13 +1056,13 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(ssize_t freed_objects, ssize_t freed_bytes) {
+void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
   // Use signed comparison since freed bytes can be negative when background compaction foreground
   // transitions occurs. This is caused by the moving objects from a bump pointer space to a
   // free list backed space typically increasing memory footprint due to padding and binning.
-  DCHECK_LE(freed_bytes, static_cast<ssize_t>(num_bytes_allocated_.Load()));
-  DCHECK_GE(freed_objects, 0);
-  num_bytes_allocated_.FetchAndSub(freed_bytes);
+  DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.LoadRelaxed()));
+  // Note: This relies on 2s complement for handling negative freed_bytes.
+  num_bytes_allocated_.FetchAndSubSequentiallyConsistent(static_cast<ssize_t>(freed_bytes));
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
     thread_stats->freed_objects += freed_objects;
@@ -1165,42 +1078,44 @@
                                              size_t alloc_size, size_t* bytes_allocated,
                                              size_t* usable_size,
                                              mirror::Class** klass) {
-  mirror::Object* ptr = nullptr;
   bool was_default_allocator = allocator == GetCurrentAllocator();
   DCHECK(klass != nullptr);
-  SirtRef<mirror::Class> sirt_klass(self, *klass);
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Class> h(hs.NewHandleWrapper(klass));
+  klass = nullptr;  // Invalidate for safety.
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  collector::GcType last_gc = WaitForGcToComplete(self);
+  collector::GcType last_gc = WaitForGcToComplete(kGcCauseForAlloc, self);
   if (last_gc != collector::kGcTypeNone) {
     // If we were the default allocator but the allocator changed while we were suspended,
     // abort the allocation.
     if (was_default_allocator && allocator != GetCurrentAllocator()) {
-      *klass = sirt_klass.get();
       return nullptr;
     }
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
+    mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
+                                                     usable_size);
+    if (ptr != nullptr) {
+      return ptr;
+    }
   }
 
   collector::GcType tried_type = next_gc_type_;
-  if (ptr == nullptr) {
-    const bool gc_ran =
-        CollectGarbageInternal(tried_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
-    if (was_default_allocator && allocator != GetCurrentAllocator()) {
-      *klass = sirt_klass.get();
-      return nullptr;
-    }
-    if (gc_ran) {
-      ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
+  const bool gc_ran =
+      CollectGarbageInternal(tried_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
+  if (was_default_allocator && allocator != GetCurrentAllocator()) {
+    return nullptr;
+  }
+  if (gc_ran) {
+    mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
+                                                     usable_size);
+    if (ptr != nullptr) {
+      return ptr;
     }
   }
 
   // Loop through our different Gc types and try to Gc until we get enough free memory.
   for (collector::GcType gc_type : gc_plan_) {
-    if (ptr != nullptr) {
-      break;
-    }
     if (gc_type == tried_type) {
       continue;
     }
@@ -1208,40 +1123,41 @@
     const bool gc_ran =
         CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
     if (was_default_allocator && allocator != GetCurrentAllocator()) {
-      *klass = sirt_klass.get();
       return nullptr;
     }
     if (gc_ran) {
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
+      mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated,
+                                                       usable_size);
+      if (ptr != nullptr) {
+        return ptr;
+      }
     }
   }
   // Allocations have failed after GCs;  this is an exceptional state.
-  if (ptr == nullptr) {
-    // Try harder, growing the heap if necessary.
-    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
+  // Try harder, growing the heap if necessary.
+  mirror::Object* ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated,
+                                                  usable_size);
+  if (ptr != nullptr) {
+    return ptr;
   }
-  if (ptr == nullptr) {
-    // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
-    // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
-    // VM spec requires that all SoftReferences have been collected and cleared before throwing
-    // OOME.
-    VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
-             << " allocation";
-    // TODO: Run finalization, but this may cause more allocations to occur.
-    // We don't need a WaitForGcToComplete here either.
-    DCHECK(!gc_plan_.empty());
-    CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
-    if (was_default_allocator && allocator != GetCurrentAllocator()) {
-      *klass = sirt_klass.get();
-      return nullptr;
-    }
-    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
-    if (ptr == nullptr) {
-      ThrowOutOfMemoryError(self, alloc_size, false);
-    }
+  // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
+  // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
+  // VM spec requires that all SoftReferences have been collected and cleared before throwing
+  // OOME.
+  VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
+           << " allocation";
+  // TODO: Run finalization, but this may cause more allocations to occur.
+  // We don't need a WaitForGcToComplete here either.
+  DCHECK(!gc_plan_.empty());
+  CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
+  if (was_default_allocator && allocator != GetCurrentAllocator()) {
+    return nullptr;
   }
-  *klass = sirt_klass.get();
+  ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
+  if (ptr == nullptr) {
+    ThrowOutOfMemoryError(self, alloc_size, false);
+  }
   return ptr;
 }
 
@@ -1404,7 +1320,7 @@
   VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  uint32_t before_allocated = num_bytes_allocated_.Load();
+  uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
@@ -1418,7 +1334,7 @@
       ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
       MutexLock mu(self, *gc_complete_lock_);
       // Ensure there is only one GC at a time.
-      WaitForGcToCompleteLocked(self);
+      WaitForGcToCompleteLocked(kGcCauseCollectorTransition, self);
       // If someone else beat us to it and changed the collector before we could, exit.
       // This is safe to do before the suspend all since we set the collector_type_running_ before
       // we exit the loop. If another thread attempts to do the heap transition before we exit,
@@ -1426,11 +1342,6 @@
       if (collector_type == collector_type_) {
         return;
       }
-      if (Runtime::Current()->IsShuttingDown(self)) {
-        // Don't allow heap transitions to happen if the runtime is shutting down since these can
-        // cause objects to get finalized.
-        return;
-      }
       // GC can be disabled if someone has a used GetPrimitiveArrayCritical but not yet released.
       if (!copying_transition || disable_moving_gc_count_ == 0) {
         // TODO: Not hard code in semi-space collector?
@@ -1440,6 +1351,12 @@
     }
     usleep(1000);
   }
+  if (Runtime::Current()->IsShuttingDown(self)) {
+    // Don't allow heap transitions to happen if the runtime is shutting down since these can
+    // cause objects to get finalized.
+    FinishGC(self, collector::kGcTypeNone);
+    return;
+  }
   tl->SuspendAll();
   switch (collector_type) {
     case kCollectorTypeSS:
@@ -1477,11 +1394,11 @@
   ChangeCollector(collector_type);
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
-  EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences();
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
-  int32_t after_allocated = num_bytes_allocated_.Load();
+  int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
       << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
@@ -1642,7 +1559,6 @@
 
 void Heap::PreZygoteFork() {
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
-  static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
   // Try to see if we have any Zygote spaces.
@@ -1819,7 +1735,7 @@
     ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(gc_cause, self);
     compacting_gc = IsMovingGc(collector_type_);
     // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
     if (compacting_gc && disable_moving_gc_count_ != 0) {
@@ -1881,7 +1797,7 @@
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
   // Enqueue cleared references.
-  EnqueueClearedReferences();
+  reference_processor_.EnqueueClearedReferences();
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(collector);
   const size_t duration = collector->GetDurationNs();
@@ -1952,37 +1868,54 @@
 // Verify a reference from an object.
 class VerifyReferenceVisitor {
  public:
-  explicit VerifyReferenceVisitor(Heap* heap)
+  explicit VerifyReferenceVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
-      : heap_(heap), failed_(false) {}
+      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {}
 
-  bool Failed() const {
-    return failed_;
+  size_t GetFailureCount() const {
+    return fail_count_->LoadSequentiallyConsistent();
   }
 
   void operator()(mirror::Class* klass, mirror::Reference* ref) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+    if (verify_referent_) {
+      VerifyReference(ref, ref->GetReferent(), mirror::Reference::ReferentOffset());
+    }
   }
 
   void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    this->operator()(obj, obj->GetFieldObject<mirror::Object>(offset), offset);
+    VerifyReference(obj, obj->GetFieldObject<mirror::Object>(offset), offset);
   }
 
+  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    return heap_->IsLiveObjectLocked(obj, true, false, true);
+  }
+
+  static void VerifyRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
+                                 RootType root_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
+    if (!visitor->VerifyReference(nullptr, *root, MemberOffset(0))) {
+      LOG(ERROR) << "Root " << *root << " is dead with type " << PrettyTypeOf(*root)
+          << " thread_id= " << thread_id << " root_type= " << root_type;
+    }
+  }
+
+ private:
   // TODO: Fix the no thread safety analysis.
-  void operator()(mirror::Object* obj, mirror::Object* ref, MemberOffset offset) const
+  // Returns false on failure.
+  bool VerifyReference(mirror::Object* obj, mirror::Object* ref, MemberOffset offset) const
       NO_THREAD_SAFETY_ANALYSIS {
     if (ref == nullptr || IsLive(ref)) {
       // Verify that the reference is live.
-      return;
+      return true;
     }
-    if (!failed_) {
+    if (fail_count_->FetchAndAddSequentiallyConsistent(1) == 0) {
       // Print message on only on first failure to prevent spam.
       LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
-      failed_ = true;
     }
     if (obj != nullptr) {
+      // Only do this part for non roots.
       accounting::CardTable* card_table = heap_->GetCardTable();
       accounting::ObjectStack* alloc_stack = heap_->allocation_stack_.get();
       accounting::ObjectStack* live_stack = heap_->live_stack_.get();
@@ -2061,39 +1994,29 @@
       // Search to see if any of the roots reference our reference.
       arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
       Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg);
-    } else {
-      LOG(ERROR) << "Root " << ref << " is dead with type " << PrettyTypeOf(ref);
     }
+    return false;
   }
 
-  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    return heap_->IsLiveObjectLocked(obj, true, false, true);
-  }
-
-  static void VerifyRoots(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
-                          RootType /*root_type*/) {
-    VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    (*visitor)(nullptr, *root, MemberOffset(0));
-  }
-
- private:
   Heap* const heap_;
-  mutable bool failed_;
+  Atomic<size_t>* const fail_count_;
+  const bool verify_referent_;
 };
 
 // Verify all references within an object, for use with HeapBitmap::Visit.
 class VerifyObjectVisitor {
  public:
-  explicit VerifyObjectVisitor(Heap* heap) : heap_(heap), failed_(false) {}
+  explicit VerifyObjectVisitor(Heap* heap, Atomic<size_t>* fail_count, bool verify_referent)
+      : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {
+  }
 
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Note: we are verifying the references in obj but not obj itself, this is because obj must
     // be live or else how did we find it in the live bitmap?
-    VerifyReferenceVisitor visitor(heap_);
+    VerifyReferenceVisitor visitor(heap_, fail_count_, verify_referent_);
     // The class doesn't count as a reference but we should verify it anyways.
     obj->VisitReferences<true>(visitor, visitor);
-    failed_ = failed_ || visitor.Failed();
   }
 
   static void VisitCallback(mirror::Object* obj, void* arg)
@@ -2102,17 +2025,18 @@
     visitor->operator()(obj);
   }
 
-  bool Failed() const {
-    return failed_;
+  size_t GetFailureCount() const {
+    return fail_count_->LoadSequentiallyConsistent();
   }
 
  private:
   Heap* const heap_;
-  mutable bool failed_;
+  Atomic<size_t>* const fail_count_;
+  const bool verify_referent_;
 };
 
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
-bool Heap::VerifyHeapReferences() {
+size_t Heap::VerifyHeapReferences(bool verify_referents) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   // Lets sort our allocation stacks so that we can efficiently binary search them.
@@ -2121,7 +2045,8 @@
   // Since we sorted the allocation stack content, need to revoke all
   // thread-local allocation stacks.
   RevokeAllThreadLocalAllocationStacks(self);
-  VerifyObjectVisitor visitor(this);
+  Atomic<size_t> fail_count_(0);
+  VerifyObjectVisitor visitor(this, &fail_count_, verify_referents);
   // Verify objects in the allocation stack since these will be objects which were:
   // 1. Allocated prior to the GC (pre GC verification).
   // 2. Allocated during the GC (pre sweep GC verification).
@@ -2129,8 +2054,8 @@
   // pointing to dead objects if they are not reachable.
   VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
   // Verify the roots:
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor);
-  if (visitor.Failed()) {
+  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRootCallback, &visitor);
+  if (visitor.GetFailureCount() > 0) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
@@ -2142,9 +2067,8 @@
       remembered_set->Dump(LOG(ERROR) << remembered_set->GetName() << ": ");
     }
     DumpSpaces();
-    return false;
   }
-  return true;
+  return visitor.GetFailureCount();
 }
 
 class VerifyReferenceCardVisitor {
@@ -2347,8 +2271,10 @@
   if (verify_pre_gc_heap_) {
     TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (!VerifyHeapReferences()) {
-      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
+    size_t failures = VerifyHeapReferences();
+    if (failures > 0) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
+          << " failures";
     }
   }
   // Check that all objects which reference things in the live stack are on dirty cards.
@@ -2399,8 +2325,12 @@
     // Swapping bound bitmaps does nothing.
     gc->SwapBitmaps();
     SwapSemiSpaces();
-    if (!VerifyHeapReferences()) {
-      LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
+    // Pass in false since concurrent reference processing can mean that the reference referents
+    // may point to dead objects at the point which PreSweepingGcVerification is called.
+    size_t failures = VerifyHeapReferences(false);
+    if (failures > 0) {
+      LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed with " << failures
+          << " failures";
     }
     SwapSemiSpaces();
     gc->SwapBitmaps();
@@ -2425,8 +2355,10 @@
   if (verify_post_gc_heap_) {
     TimingLogger::ScopedSplit split("PostGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (!VerifyHeapReferences()) {
-      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
+    size_t failures = VerifyHeapReferences();
+    if (failures > 0) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
+          << " failures";
     }
   }
 }
@@ -2448,13 +2380,13 @@
   }
 }
 
-collector::GcType Heap::WaitForGcToComplete(Thread* self) {
+collector::GcType Heap::WaitForGcToComplete(GcCause cause, Thread* self) {
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
-  return WaitForGcToCompleteLocked(self);
+  return WaitForGcToCompleteLocked(cause, self);
 }
 
-collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
+collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
   uint64_t wait_start = NanoTime();
   while (collector_type_running_ != kCollectorTypeNone) {
@@ -2467,7 +2399,8 @@
   uint64_t wait_time = NanoTime() - wait_start;
   total_wait_time_ += wait_time;
   if (wait_time > long_pause_log_threshold_) {
-    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time);
+    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time)
+        << " for cause " << cause;
   }
   return last_gc_type;
 }
@@ -2503,7 +2436,7 @@
 }
 
 void Heap::UpdateMaxNativeFootprint() {
-  size_t native_size = native_bytes_allocated_;
+  size_t native_size = native_bytes_allocated_.LoadRelaxed();
   // TODO: Tune the native heap utilization to be a value other than the java heap utilization.
   size_t target_size = native_size / GetTargetHeapUtilization();
   if (target_size > native_size + max_free_) {
@@ -2611,35 +2544,26 @@
   non_moving_space_->ClearGrowthLimit();
 }
 
-void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
+void Heap::AddFinalizerReference(Thread* self, mirror::Object** object) {
   ScopedObjectAccess soa(self);
-  ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(object));
+  ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(*object));
   jvalue args[1];
   args[0].l = arg.get();
   InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_FinalizerReference_add, args);
+  // Restore object in case it gets moved.
+  *object = soa.Decode<mirror::Object*>(arg.get());
 }
 
-void Heap::EnqueueClearedReferences() {
-  Thread* self = Thread::Current();
-  Locks::mutator_lock_->AssertNotHeld(self);
-  if (!cleared_references_.IsEmpty()) {
-    // When a runtime isn't started there are no reference queues to care about so ignore.
-    if (LIKELY(Runtime::Current()->IsStarted())) {
-      ScopedObjectAccess soa(self);
-      ScopedLocalRef<jobject> arg(self->GetJniEnv(),
-                                  soa.AddLocalReference<jobject>(cleared_references_.GetList()));
-      jvalue args[1];
-      args[0].l = arg.get();
-      InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args);
-    }
-    cleared_references_.Clear();
-  }
+void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) {
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+  RequestConcurrentGC(self);
 }
 
 void Heap::RequestConcurrentGC(Thread* self) {
   // Make sure that we can do a concurrent GC.
   Runtime* runtime = Runtime::Current();
-  if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
+  if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
       self->IsHandlingStackOverflow()) {
     return;
   }
@@ -2659,7 +2583,7 @@
     return;
   }
   // Wait for any GCs currently running to finish.
-  if (WaitForGcToComplete(self) == collector::kGcTypeNone) {
+  if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
     // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
     // instead. E.g. can't do partial, so do full instead.
     if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
@@ -2784,21 +2708,22 @@
     native_need_to_run_finalization_ = false;
   }
   // Total number of native bytes allocated.
-  native_bytes_allocated_.FetchAndAdd(bytes);
-  if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) {
+  size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes);
+  new_native_bytes_allocated += bytes;
+  if (new_native_bytes_allocated > native_footprint_gc_watermark_) {
     collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial :
         collector::kGcTypeFull;
 
     // The second watermark is higher than the gc watermark. If you hit this it means you are
     // allocating native objects faster than the GC can keep up with.
-    if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-      if (WaitForGcToComplete(self) != collector::kGcTypeNone) {
+    if (new_native_bytes_allocated > native_footprint_limit_) {
+      if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) {
         // Just finished a GC, attempt to run finalizers.
         RunFinalization(env);
         CHECK(!env->ExceptionCheck());
       }
       // If we still are over the watermark, attempt a GC for alloc and run finalizers.
-      if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
+      if (new_native_bytes_allocated > native_footprint_limit_) {
         CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
         RunFinalization(env);
         native_need_to_run_finalization_ = false;
@@ -2820,7 +2745,7 @@
 void Heap::RegisterNativeFree(JNIEnv* env, int bytes) {
   int expected_size, new_size;
   do {
-    expected_size = native_bytes_allocated_.Load();
+    expected_size = native_bytes_allocated_.LoadRelaxed();
     new_size = expected_size - bytes;
     if (UNLIKELY(new_size < 0)) {
       ScopedObjectAccess soa(env);
@@ -2829,7 +2754,7 @@
                                  "registered as allocated", bytes, expected_size).c_str());
       break;
     }
-  } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size));
+  } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size, new_size));
 }
 
 size_t Heap::GetTotalMemory() const {
@@ -2856,7 +2781,7 @@
 void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) {
   CHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
-        strlen(ClassHelper(c).GetDescriptor()) == 0);
+        c->GetDescriptor().empty());
   CHECK_GE(byte_count, sizeof(mirror::Object));
 }
 
@@ -2864,9 +2789,9 @@
   CHECK(remembered_set != nullptr);
   space::Space* space = remembered_set->GetSpace();
   CHECK(space != nullptr);
-  CHECK(remembered_sets_.find(space) == remembered_sets_.end());
+  CHECK(remembered_sets_.find(space) == remembered_sets_.end()) << space;
   remembered_sets_.Put(space, remembered_set);
-  CHECK(remembered_sets_.find(space) != remembered_sets_.end());
+  CHECK(remembered_sets_.find(space) != remembered_sets_.end()) << space;
 }
 
 void Heap::RemoveRememberedSet(space::Space* space) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index d3b5cdc..887b17e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -35,7 +35,7 @@
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "reference_queue.h"
+#include "reference_processor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 #include "verify_object.h"
@@ -54,6 +54,9 @@
 }  // namespace mirror
 
 namespace gc {
+
+class ReferenceProcessor;
+
 namespace accounting {
   class HeapBitmap;
   class ModUnionTable;
@@ -106,8 +109,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const ProcessState& process_state);
 
-std::ostream& operator<<(std::ostream& os, const RootType& root_type);
-
 class Heap {
  public:
   // If true, measure the total allocation time.
@@ -141,7 +142,7 @@
                 size_t max_free, double target_utilization,
                 double foreground_heap_growth_multiplier, size_t capacity,
                 const std::string& original_image_file_name,
-                const InstructionSet image_instruction_set,
+                InstructionSet image_instruction_set,
                 CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
@@ -215,7 +216,8 @@
 
   // Check sanity of all live references.
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  bool VerifyHeapReferences()
+  // Returns how many failures occured.
+  size_t VerifyHeapReferences(bool verify_referents = true)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
   bool VerifyMissingCardMarks()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -300,7 +302,8 @@
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
   // waited for.
-  collector::GcType WaitForGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  collector::GcType WaitForGcToComplete(GcCause cause, Thread* self)
+      LOCKS_EXCLUDED(gc_complete_lock_);
 
   // Update the heap's process state to a new value, may cause compaction to occur.
   void UpdateProcessState(ProcessState process_state);
@@ -313,21 +316,6 @@
     return discontinuous_spaces_;
   }
 
-  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
-  void ProcessSoftReferences(TimingLogger& timings, bool clear_soft,
-                             IsMarkedCallback* is_marked_callback,
-                             MarkObjectCallback* mark_object_callback,
-                             ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  void ProcessReferences(TimingLogger& timings, bool clear_soft,
-                         IsMarkedCallback* is_marked_callback,
-                         MarkObjectCallback* mark_object_callback,
-                         ProcessMarkStackCallback* process_mark_stack_callback,
-                         void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Enable verification of object references when the runtime is sufficiently initialized.
   void EnableObjectValidation() {
     verify_object_mode_ = kVerifyObjectSupport;
@@ -357,7 +345,7 @@
 
   // Freed bytes can be negative in cases where we copy objects from a compacted space to a
   // free-list backed space.
-  void RecordFree(ssize_t freed_objects, ssize_t freed_bytes);
+  void RecordFree(uint64_t freed_objects, int64_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -380,11 +368,11 @@
     return card_table_.get();
   }
 
-  void AddFinalizerReference(Thread* self, mirror::Object* object);
+  void AddFinalizerReference(Thread* self, mirror::Object** object);
 
   // Returns the number of bytes currently allocated.
   size_t GetBytesAllocated() const {
-    return num_bytes_allocated_;
+    return num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
   // Returns the number of objects currently allocated.
@@ -420,10 +408,10 @@
 
   // Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    return GetTotalMemory() - num_bytes_allocated_;
+    return GetTotalMemory() - num_bytes_allocated_.LoadSequentiallyConsistent();
   }
 
-  // Get the space that corresponds to an object's address. Current implementation searches all
+  // get the space that corresponds to an object's address. Current implementation searches all
   // spaces in turn. If fail_ok is false then failing to find a space will cause an abort.
   // TODO: consider using faster data structure like binary tree.
   space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const;
@@ -564,6 +552,10 @@
   }
   bool HasImageSpace() const;
 
+  ReferenceProcessor* GetReferenceProcessor() {
+    return &reference_processor_;
+  }
+
  private:
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space)
@@ -589,6 +581,10 @@
                                        mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  accounting::ObjectStack* GetMarkStack() {
+    return mark_stack_.get();
+  }
+
   // We don't force this to be inlined since it is a slow path.
   template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocLargeObject(Thread* self, mirror::Class* klass, size_t byte_count,
@@ -630,24 +626,21 @@
   bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void EnqueueClearedReferences();
-  // Returns true if the reference object has not yet been enqueued.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
-                              IsMarkedCallback is_marked_callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Run the finalizers.
   void RunFinalization(JNIEnv* env);
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
   // waited for.
-  collector::GcType WaitForGcToCompleteLocked(Thread* self)
+  collector::GcType WaitForGcToCompleteLocked(GcCause cause, Thread* self)
       EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
   void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time)
       LOCKS_EXCLUDED(heap_trim_request_lock_);
   void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
-  void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
+  void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RequestConcurrentGC(Thread* self)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
   // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns
@@ -739,7 +732,7 @@
   space::LargeObjectSpace* large_object_space_;
 
   // The card table, dirtied by the write barrier.
-  UniquePtr<accounting::CardTable> card_table_;
+  std::unique_ptr<accounting::CardTable> card_table_;
 
   // A mod-union table remembers all of the references from the it's space to other spaces.
   SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
@@ -785,6 +778,9 @@
   // useful for benchmarking since it reduces time spent in GC to a low %.
   const bool ignore_max_footprint_;
 
+  // Lock which guards zygote space creation.
+  Mutex zygote_creation_lock_;
+
   // If we have a zygote space.
   bool have_zygote_space_;
 
@@ -794,14 +790,10 @@
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
+  std::unique_ptr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
-  // Reference queues.
-  ReferenceQueue soft_reference_queue_;
-  ReferenceQueue weak_reference_queue_;
-  ReferenceQueue finalizer_reference_queue_;
-  ReferenceQueue phantom_reference_queue_;
-  ReferenceQueue cleared_references_;
+  // Reference processor;
+  ReferenceProcessor reference_processor_;
 
   // True while the garbage collector is running.
   volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
@@ -890,7 +882,7 @@
   };
 
   // Parallel GC data structures.
-  UniquePtr<ThreadPool> thread_pool_;
+  std::unique_ptr<ThreadPool> thread_pool_;
 
   // The nanosecond time at which the last GC ended.
   uint64_t last_gc_time_ns_;
@@ -903,19 +895,19 @@
   uint64_t allocation_rate_;
 
   // For a GC cycle, a bitmap that is set corresponding to the
-  UniquePtr<accounting::HeapBitmap> live_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
-  UniquePtr<accounting::HeapBitmap> mark_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
+  std::unique_ptr<accounting::HeapBitmap> live_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
+  std::unique_ptr<accounting::HeapBitmap> mark_bitmap_ GUARDED_BY(Locks::heap_bitmap_lock_);
 
   // Mark stack that we reuse to avoid re-allocating the mark stack.
-  UniquePtr<accounting::ObjectStack> mark_stack_;
+  std::unique_ptr<accounting::ObjectStack> mark_stack_;
 
   // Allocation stack, new allocations go here so that we can do sticky mark bits. This enables us
   // to use the live bitmap as the old mark bitmap.
   const size_t max_allocation_stack_size_;
-  UniquePtr<accounting::ObjectStack> allocation_stack_;
+  std::unique_ptr<accounting::ObjectStack> allocation_stack_;
 
   // Second allocation stack so that we can process allocation with the heap unlocked.
-  UniquePtr<accounting::ObjectStack> live_stack_;
+  std::unique_ptr<accounting::ObjectStack> live_stack_;
 
   // Allocator type.
   AllocatorType current_allocator_;
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index a85ad4d..4176f4a 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -20,7 +20,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 namespace gc {
@@ -43,14 +43,16 @@
     ScopedObjectAccess soa(Thread::Current());
     // garbage is created during ClassLinker::Init
 
-    SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
-                                                                        "[Ljava/lang/Object;"));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Class> c(
+        hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
     for (size_t i = 0; i < 1024; ++i) {
-      SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
-          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c.get(), 2048));
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ObjectArray<mirror::Object>> array(hs.NewHandle(
+          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c.Get(), 2048)));
       for (size_t j = 0; j < 2048; ++j) {
         mirror::String* string = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!");
-        // SIRT operator -> deferences the SIRT before running the method.
+        // handle scope operator -> deferences the handle scope before running the method.
         array->Set<false>(j, string);
       }
     }
@@ -61,7 +63,7 @@
 TEST_F(HeapTest, HeapBitmapCapacityTest) {
   byte* heap_begin = reinterpret_cast<byte*>(0x1000);
   const size_t heap_capacity = kObjectAlignment * (sizeof(intptr_t) * 8 + 1);
-  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   mirror::Object* fake_end_of_heap_object =
       reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity - kObjectAlignment]);
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
new file mode 100644
index 0000000..a58df8e
--- /dev/null
+++ b/runtime/gc/reference_processor.cc
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reference_processor.h"
+
+#include "mirror/object-inl.h"
+#include "mirror/reference-inl.h"
+#include "reflection.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
+#include "well_known_classes.h"
+
+namespace art {
+namespace gc {
+
+ReferenceProcessor::ReferenceProcessor()
+    : process_references_args_(nullptr, nullptr, nullptr), slow_path_enabled_(false),
+      preserving_references_(false), lock_("reference processor lock", kReferenceProcessorLock),
+      condition_("reference processor condition", lock_) {
+}
+
+void ReferenceProcessor::EnableSlowPath() {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  slow_path_enabled_ = true;
+}
+
+void ReferenceProcessor::DisableSlowPath(Thread* self) {
+  slow_path_enabled_ = false;
+  condition_.Broadcast(self);
+}
+
+mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
+  mirror::Object* const referent = reference->GetReferent();
+  if (LIKELY(!slow_path_enabled_)) {
+    return referent;
+  }
+  // Another fast path, the referent is cleared, we can just return null since there is no scenario
+  // where it becomes non-null.
+  if (referent == nullptr) {
+    return nullptr;
+  }
+  MutexLock mu(self, lock_);
+  while (slow_path_enabled_) {
+    mirror::Object* const referent = reference->GetReferent();
+    // If the referent became cleared, return it.
+    if (referent == nullptr) {
+      return nullptr;
+    }
+    // Try to see if the referent is already marked by using the is_marked_callback. We can return
+    // it to the mutator as long as the GC is not preserving references. If the GC is
+    // preserving references, the mutator could take a white field and move it somewhere else
+    // in the heap causing corruption since this field would get swept.
+    IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
+    if (!preserving_references_ && is_marked_callback != nullptr) {
+      mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
+      // If it's null it means not marked, but it could become marked if the referent is reachable
+      // by finalizer referents. So we can not return in this case and must block.
+      if (obj != nullptr) {
+        return obj;
+      }
+    }
+    condition_.WaitHoldingLocks(self);
+  }
+  return reference->GetReferent();
+}
+
+mirror::Object* ReferenceProcessor::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+  auto* const args = reinterpret_cast<ProcessReferencesArgs*>(arg);
+  // TODO: Not preserve all soft references.
+  return args->mark_callback_(obj, args->arg_);
+}
+
+void ReferenceProcessor::StartPreservingReferences(Thread* self) {
+  MutexLock mu(self, lock_);
+  preserving_references_ = true;
+}
+
+void ReferenceProcessor::StopPreservingReferences(Thread* self) {
+  MutexLock mu(self, lock_);
+  preserving_references_ = false;
+  // We are done preserving references, some people who are blocked may see a marked referent.
+  condition_.Broadcast(self);
+}
+
+// Process reference class instances and schedule finalizations.
+void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
+                                           bool clear_soft_references,
+                                           IsMarkedCallback* is_marked_callback,
+                                           MarkObjectCallback* mark_object_callback,
+                                           ProcessMarkStackCallback* process_mark_stack_callback,
+                                           void* arg) {
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, lock_);
+    process_references_args_.is_marked_callback_ = is_marked_callback;
+    process_references_args_.mark_callback_ = mark_object_callback;
+    process_references_args_.arg_ = arg;
+    CHECK_EQ(slow_path_enabled_, concurrent) << "Slow path must be enabled iff concurrent";
+  }
+  timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences");
+  // Unless required to clear soft references with white references, preserve some white referents.
+  if (!clear_soft_references) {
+    TimingLogger::ScopedSplit split(concurrent ? "PreserveSomeSoftReferences" :
+        "(Paused)PreserveSomeSoftReferences", timings);
+    if (concurrent) {
+      StartPreservingReferences(self);
+    }
+    // References with a marked referent are removed from the list.
+    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
+                                                     &process_references_args_);
+    process_mark_stack_callback(arg);
+    if (concurrent) {
+      StopPreservingReferences(self);
+    }
+  }
+  // Clear all remaining soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  {
+    TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" :
+        "(Paused)EnqueueFinalizerReferences", timings);
+    if (concurrent) {
+      StartPreservingReferences(self);
+    }
+    // Preserve all white objects with finalize methods and schedule them for finalization.
+    finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+                                                          mark_object_callback, arg);
+    process_mark_stack_callback(arg);
+    if (concurrent) {
+      StopPreservingReferences(self);
+    }
+  }
+  // Clear all finalizer referent reachable soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // Clear all phantom references with white referents.
+  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // At this point all reference queues other than the cleared references should be empty.
+  DCHECK(soft_reference_queue_.IsEmpty());
+  DCHECK(weak_reference_queue_.IsEmpty());
+  DCHECK(finalizer_reference_queue_.IsEmpty());
+  DCHECK(phantom_reference_queue_.IsEmpty());
+  {
+    MutexLock mu(self, lock_);
+    // Need to always do this since the next GC may be concurrent. Doing this for only concurrent
+    // could result in a stale is_marked_callback_ being called before the reference processing
+    // starts since there is a small window of time where slow_path_enabled_ is enabled but the
+    // callback isn't yet set.
+    process_references_args_.is_marked_callback_ = nullptr;
+    if (concurrent) {
+      // Done processing, disable the slow path and broadcast to the waiters.
+      DisableSlowPath(self);
+    }
+  }
+  timings->EndSplit();
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
+                                                IsMarkedCallback is_marked_callback, void* arg) {
+  // klass can be the class of the old object if the visitor already updated the class of ref.
+  DCHECK(klass->IsReferenceClass());
+  mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
+  if (referent != nullptr) {
+    mirror::Object* forward_address = is_marked_callback(referent, arg);
+    // Null means that the object is not currently marked.
+    if (forward_address == nullptr) {
+      Thread* self = Thread::Current();
+      // TODO: Remove these locks, and use atomic stacks for storing references?
+      // We need to check that the references haven't already been enqueued since we can end up
+      // scanning the same reference multiple times due to dirty cards.
+      if (klass->IsSoftReferenceClass()) {
+        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+      } else if (klass->IsWeakReferenceClass()) {
+        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+      } else if (klass->IsFinalizerReferenceClass()) {
+        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+      } else if (klass->IsPhantomReferenceClass()) {
+        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
+      } else {
+        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                   << klass->GetAccessFlags();
+      }
+    } else if (referent != forward_address) {
+      // Referent is already marked and we need to update it.
+      ref->SetReferent<false>(forward_address);
+    }
+  }
+}
+
+void ReferenceProcessor::EnqueueClearedReferences() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  if (!cleared_references_.IsEmpty()) {
+    // When a runtime isn't started there are no reference queues to care about so ignore.
+    if (LIKELY(Runtime::Current()->IsStarted())) {
+      ScopedObjectAccess soa(self);
+      ScopedLocalRef<jobject> arg(self->GetJniEnv(),
+                                  soa.AddLocalReference<jobject>(cleared_references_.GetList()));
+      jvalue args[1];
+      args[0].l = arg.get();
+      InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args);
+    }
+    cleared_references_.Clear();
+  }
+}
+
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
new file mode 100644
index 0000000..f082a9e
--- /dev/null
+++ b/runtime/gc/reference_processor.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_REFERENCE_PROCESSOR_H_
+#define ART_RUNTIME_GC_REFERENCE_PROCESSOR_H_
+
+#include "base/mutex.h"
+#include "globals.h"
+#include "jni.h"
+#include "object_callbacks.h"
+#include "reference_queue.h"
+
+namespace art {
+
+class TimingLogger;
+
+namespace mirror {
+class Object;
+class Reference;
+}  // namespace mirror
+
+namespace gc {
+
+class Heap;
+
+// Used to process java.lang.References concurrently or paused.
+class ReferenceProcessor {
+ public:
+  explicit ReferenceProcessor();
+  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
+                         IsMarkedCallback* is_marked_callback,
+                         MarkObjectCallback* mark_object_callback,
+                         ProcessMarkStackCallback* process_mark_stack_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      LOCKS_EXCLUDED(lock_);
+  // Only allow setting this with mutators suspended so that we can avoid using a lock in the
+  // GetReferent fast path as an optimization.
+  void EnableSlowPath() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Decode the referent, may block if references are being processed.
+  mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
+  void EnqueueClearedReferences() LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
+                              IsMarkedCallback is_marked_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  class ProcessReferencesArgs {
+   public:
+    ProcessReferencesArgs(IsMarkedCallback* is_marked_callback,
+                          MarkObjectCallback* mark_callback, void* arg)
+        : is_marked_callback_(is_marked_callback), mark_callback_(mark_callback), arg_(arg) {
+    }
+
+    // The is marked callback is null when the args aren't set up.
+    IsMarkedCallback* is_marked_callback_;
+    MarkObjectCallback* mark_callback_;
+    void* arg_;
+  };
+  // Called by ProcessReferences.
+  void DisableSlowPath(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  // If we are preserving references it means that some dead objects may become live, we use start
+  // and stop preserving to block mutators using GetReferrent from getting access to these
+  // referents.
+  void StartPreservingReferences(Thread* self) LOCKS_EXCLUDED(lock_);
+  void StopPreservingReferences(Thread* self) LOCKS_EXCLUDED(lock_);
+  // Process args, used by the GetReferent to return referents which are already marked.
+  ProcessReferencesArgs process_references_args_ GUARDED_BY(lock_);
+  // Boolean for whether or not we need to go slow path in GetReferent.
+  volatile bool slow_path_enabled_;
+  // Boolean for whether or not we are preserving references (either soft references or finalizers).
+  // If this is true, then we cannot return a referent (see comment in GetReferent).
+  bool preserving_references_ GUARDED_BY(lock_);
+  // Lock that guards the reference processing.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Condition that people wait on if they attempt to get the referent of a reference while
+  // processing is in progress.
+  ConditionVariable condition_ GUARDED_BY(lock_);
+  // Reference queues used by the GC.
+  ReferenceQueue soft_reference_queue_;
+  ReferenceQueue weak_reference_queue_;
+  ReferenceQueue finalizer_reference_queue_;
+  ReferenceQueue phantom_reference_queue_;
+  ReferenceQueue cleared_references_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_REFERENCE_PROCESSOR_H_
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index aee7891..caacef5 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -109,7 +109,7 @@
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
@@ -131,17 +131,17 @@
 }
 
 void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                IsMarkedCallback is_marked_callback,
-                                                MarkObjectCallback recursive_mark_callback,
+                                                IsMarkedCallback* is_marked_callback,
+                                                MarkObjectCallback* mark_object_callback,
                                                 void* arg) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = is_marked_callback(referent, arg);
       // If the referent isn't marked, mark it and update the
       if (forward_address == nullptr) {
-        forward_address = recursive_mark_callback(referent, arg);
+        forward_address = mark_object_callback(referent, arg);
         // If the referent is non-null the reference must queuable.
         DCHECK(ref->IsEnqueuable());
         // Move the updated referent to the zombie field.
@@ -160,11 +160,11 @@
   }
 }
 
-void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback preserve_callback, void* arg) {
+void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg) {
   ReferenceQueue cleared;
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
@@ -180,4 +180,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 8d392ba..4f223e2 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -59,8 +59,8 @@
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
   void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  IsMarkedCallback is_marked_callback,
-                                  MarkObjectCallback recursive_mark_callback, void* arg)
+                                  IsMarkedCallback* is_marked_callback,
+                                  MarkObjectCallback* mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
@@ -69,7 +69,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references, IsMarkedCallback is_marked_callback,
+  void ClearWhiteReferences(ReferenceQueue& cleared_references,
+                            IsMarkedCallback* is_marked_callback,
                             void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 497a61f..71c295e 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -48,8 +48,8 @@
   end_ += num_bytes;
   *bytes_allocated = num_bytes;
   // Use the CAS free versions as an optimization.
-  objects_allocated_ = objects_allocated_ + 1;
-  bytes_allocated_ = bytes_allocated_ + num_bytes;
+  objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
+  bytes_allocated_.StoreRelaxed(bytes_allocated_.LoadRelaxed() + num_bytes);
   if (UNLIKELY(usable_size != nullptr)) {
     *usable_size = num_bytes;
   }
@@ -76,8 +76,8 @@
 inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) {
   mirror::Object* ret = AllocNonvirtualWithoutAccounting(num_bytes);
   if (ret != nullptr) {
-    objects_allocated_.FetchAndAdd(1);
-    bytes_allocated_.FetchAndAdd(num_bytes);
+    objects_allocated_.FetchAndAddSequentiallyConsistent(1);
+    bytes_allocated_.FetchAndAddSequentiallyConsistent(num_bytes);
   }
   return ret;
 }
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 90ffe59..fd0a92d 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -28,7 +28,7 @@
                                            byte* requested_begin) {
   capacity = RoundUp(capacity, kPageSize);
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
+  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
                                                  PROT_READ | PROT_WRITE, true, &error_msg));
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
@@ -68,8 +68,8 @@
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
   SetEnd(Begin());
-  objects_allocated_ = 0;
-  bytes_allocated_ = 0;
+  objects_allocated_.StoreRelaxed(0);
+  bytes_allocated_.StoreRelaxed(0);
   growth_end_ = Limit();
   {
     MutexLock mu(Thread::Current(), block_lock_);
@@ -204,7 +204,7 @@
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(bytes_allocated_.Load());
+  uint64_t total = static_cast<uint64_t>(bytes_allocated_.LoadRelaxed());
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -222,7 +222,7 @@
 
 uint64_t BumpPointerSpace::GetObjectsAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(objects_allocated_.Load());
+  uint64_t total = static_cast<uint64_t>(objects_allocated_.LoadRelaxed());
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -239,8 +239,8 @@
 }
 
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
-  objects_allocated_.FetchAndAdd(thread->GetThreadLocalObjectsAllocated());
-  bytes_allocated_.FetchAndAdd(thread->GetThreadLocalBytesAllocated());
+  objects_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalObjectsAllocated());
+  bytes_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalBytesAllocated());
   thread->SetTlab(nullptr, nullptr);
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 3de1ba4..45fee14 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -33,15 +33,16 @@
 
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
-ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
-                       accounting::ContinuousSpaceBitmap* live_bitmap)
-    : MemMapSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
-                  kGcRetentionPolicyNeverCollect) {
+ImageSpace::ImageSpace(const std::string& image_filename, const char* image_location,
+                       MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap)
+    : MemMapSpace(image_filename, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                  kGcRetentionPolicyNeverCollect),
+      image_location_(image_location) {
   DCHECK(live_bitmap != nullptr);
   live_bitmap_.reset(live_bitmap);
 }
 
-static bool GenerateImage(const std::string& image_file_name, std::string* error_msg) {
+static bool GenerateImage(const std::string& image_filename, std::string* error_msg) {
   const std::string boot_class_path_string(Runtime::Current()->GetBootClassPathString());
   std::vector<std::string> boot_class_path;
   Split(boot_class_path_string, ':', boot_class_path);
@@ -57,7 +58,7 @@
   arg_vector.push_back(dex2oat);
 
   std::string image_option_string("--image=");
-  image_option_string += image_file_name;
+  image_option_string += image_filename;
   arg_vector.push_back(image_option_string);
 
   arg_vector.push_back("--runtime-arg");
@@ -72,7 +73,7 @@
   }
 
   std::string oat_file_option_string("--oat-file=");
-  oat_file_option_string += image_file_name;
+  oat_file_option_string += image_filename;
   oat_file_option_string.erase(oat_file_option_string.size() - 3);
   oat_file_option_string += "oat";
   arg_vector.push_back(oat_file_option_string);
@@ -98,37 +99,78 @@
   return Exec(arg_vector, error_msg);
 }
 
-ImageSpace* ImageSpace::Create(const char* original_image_file_name,
-                               const InstructionSet image_isa) {
-  if (OS::FileExists(original_image_file_name)) {
-    // If the /system file exists, it should be up-to-date, don't try to generate
-    std::string error_msg;
-    ImageSpace* space = ImageSpace::Init(original_image_file_name, false, &error_msg);
-    if (space == nullptr) {
-      LOG(FATAL) << "Failed to load image '" << original_image_file_name << "': " << error_msg;
-    }
-    return space;
+bool ImageSpace::FindImageFilename(const char* image_location,
+                                   const InstructionSet image_isa,
+                                   std::string* image_filename,
+                                   bool *is_system) {
+  // image_location = /system/framework/boot.art
+  // system_image_location = /system/framework/<image_isa>/boot.art
+  std::string system_image_filename(GetSystemImageFilename(image_location, image_isa));
+  if (OS::FileExists(system_image_filename.c_str())) {
+    *image_filename = system_image_filename;
+    *is_system = true;
+    return true;
   }
-  // If the /system file didn't exist, we need to use one from the dalvik-cache.
-  // If the cache file exists, try to open, but if it fails, regenerate.
-  // If it does not exist, generate.
+
   const std::string dalvik_cache = GetDalvikCacheOrDie(GetInstructionSetString(image_isa));
-  std::string image_file_name(GetDalvikCacheFilenameOrDie(original_image_file_name,
-                                                          dalvik_cache.c_str()));
+
+  // Always set output location even if it does not exist,
+  // so that the caller knows where to create the image.
+  //
+  // image_location = /system/framework/boot.art
+  // *image_filename = /data/dalvik-cache/<image_isa>/boot.art
+  *image_filename = GetDalvikCacheFilenameOrDie(image_location, dalvik_cache.c_str());
+  *is_system = false;
+  return OS::FileExists(image_filename->c_str());
+}
+
+ImageHeader* ImageSpace::ReadImageHeaderOrDie(const char* image_location,
+                                              const InstructionSet image_isa) {
+  std::string image_filename;
+  bool is_system = false;
+  if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
+    std::unique_ptr<File> image_file(OS::OpenFileForReading(image_filename.c_str()));
+    std::unique_ptr<ImageHeader> image_header(new ImageHeader);
+    const bool success = image_file->ReadFully(image_header.get(), sizeof(ImageHeader));
+    if (!success || !image_header->IsValid()) {
+      LOG(FATAL) << "Invalid Image header for: " << image_filename;
+      return nullptr;
+    }
+
+    return image_header.release();
+  }
+
+  LOG(FATAL) << "Unable to find image file for: " << image_location;
+  return nullptr;
+}
+
+ImageSpace* ImageSpace::Create(const char* image_location,
+                               const InstructionSet image_isa) {
+  std::string image_filename;
   std::string error_msg;
-  if (OS::FileExists(image_file_name.c_str())) {
-    space::ImageSpace* image_space = ImageSpace::Init(image_file_name.c_str(), true, &error_msg);
-    if (image_space != nullptr) {
-      return image_space;
+  bool is_system = false;
+  if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
+    ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system,
+                                         &error_msg);
+    if (space != nullptr) {
+      return space;
+    }
+
+    // If the /system file exists, it should be up-to-date, don't try to generate it.
+    // If it's not the /system file, log a warning and fall through to GenerateImage.
+    if (is_system) {
+      LOG(FATAL) << "Failed to load image '" << image_filename << "': " << error_msg;
+      return nullptr;
     } else {
       LOG(WARNING) << error_msg;
     }
   }
-  CHECK(GenerateImage(image_file_name, &error_msg))
-      << "Failed to generate image '" << image_file_name << "': " << error_msg;
-  ImageSpace* space = ImageSpace::Init(image_file_name.c_str(), true, &error_msg);
+
+  CHECK(GenerateImage(image_filename, &error_msg))
+      << "Failed to generate image '" << image_filename << "': " << error_msg;
+  ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, true, &error_msg);
   if (space == nullptr) {
-    LOG(FATAL) << "Failed to load image '" << original_image_file_name << "': " << error_msg;
+    LOG(FATAL) << "Failed to load image '" << image_filename << "': " << error_msg;
   }
   return space;
 }
@@ -147,37 +189,38 @@
   }
 }
 
-ImageSpace* ImageSpace::Init(const char* image_file_name, bool validate_oat_file,
-                             std::string* error_msg) {
-  CHECK(image_file_name != nullptr);
+ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_location,
+                             bool validate_oat_file, std::string* error_msg) {
+  CHECK(image_filename != nullptr);
+  CHECK(image_location != nullptr);
 
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
-    LOG(INFO) << "ImageSpace::Init entering image_file_name=" << image_file_name;
+    LOG(INFO) << "ImageSpace::Init entering image_filename=" << image_filename;
   }
 
-  UniquePtr<File> file(OS::OpenFileForReading(image_file_name));
+  std::unique_ptr<File> file(OS::OpenFileForReading(image_filename));
   if (file.get() == NULL) {
-    *error_msg = StringPrintf("Failed to open '%s'", image_file_name);
+    *error_msg = StringPrintf("Failed to open '%s'", image_filename);
     return nullptr;
   }
   ImageHeader image_header;
   bool success = file->ReadFully(&image_header, sizeof(image_header));
   if (!success || !image_header.IsValid()) {
-    *error_msg = StringPrintf("Invalid image header in '%s'", image_file_name);
+    *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
     return nullptr;
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  UniquePtr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
+  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
                                                  image_header.GetImageSize(),
                                                  PROT_READ | PROT_WRITE,
                                                  MAP_PRIVATE,
                                                  file->Fd(),
                                                  0,
                                                  false,
-                                                 image_file_name,
+                                                 image_filename,
                                                  error_msg));
   if (map.get() == NULL) {
     DCHECK(!error_msg->empty());
@@ -186,20 +229,20 @@
   CHECK_EQ(image_header.GetImageBegin(), map->Begin());
   DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
 
-  UniquePtr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr, image_header.GetImageBitmapSize(),
+  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr, image_header.GetImageBitmapSize(),
                                                        PROT_READ, MAP_PRIVATE,
                                                        file->Fd(), image_header.GetBitmapOffset(),
                                                        false,
-                                                       image_file_name,
+                                                       image_filename,
                                                        error_msg));
   if (image_map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
-  uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
-  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name,
+  uint32_t bitmap_index = bitmap_index_.FetchAndAddSequentiallyConsistent(1);
+  std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
                                        bitmap_index));
-  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
       accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
                                                           reinterpret_cast<byte*>(map->Begin()),
                                                           map->Size()));
@@ -208,7 +251,29 @@
     return nullptr;
   }
 
+  std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename, image_location,
+                                             map.release(), bitmap.release()));
+
+  // VerifyImageAllocations() will be called later in Runtime::Init()
+  // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
+  // and ArtField::java_lang_reflect_ArtField_, which are used from
+  // Object::SizeOf() which VerifyImageAllocations() calls, are not
+  // set yet at this point.
+
+  space->oat_file_.reset(space->OpenOatFile(image_filename, error_msg));
+  if (space->oat_file_.get() == nullptr) {
+    DCHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  if (validate_oat_file && !space->ValidateOatFile(error_msg)) {
+    DCHECK(!error_msg->empty());
+    return nullptr;
+  }
+
   Runtime* runtime = Runtime::Current();
+  runtime->SetInstructionSet(space->oat_file_->GetOatHeader().GetInstructionSet());
+
   mirror::Object* resolution_method = image_header.GetImageRoot(ImageHeader::kResolutionMethod);
   runtime->SetResolutionMethod(down_cast<mirror::ArtMethod*>(resolution_method));
   mirror::Object* imt_conflict_method = image_header.GetImageRoot(ImageHeader::kImtConflictMethod);
@@ -223,22 +288,6 @@
   callee_save_method = image_header.GetImageRoot(ImageHeader::kRefsAndArgsSaveMethod);
   runtime->SetCalleeSaveMethod(down_cast<mirror::ArtMethod*>(callee_save_method), Runtime::kRefsAndArgs);
 
-  UniquePtr<ImageSpace> space(new ImageSpace(image_file_name, map.release(), bitmap.release()));
-  if (kIsDebugBuild) {
-    space->VerifyImageAllocations();
-  }
-
-  space->oat_file_.reset(space->OpenOatFile(image_file_name, error_msg));
-  if (space->oat_file_.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
-  }
-
-  if (validate_oat_file && !space->ValidateOatFile(error_msg)) {
-    DCHECK(!error_msg->empty());
-    return nullptr;
-  }
-
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "ImageSpace::Init exiting (" << PrettyDuration(NanoTime() - start_time)
              << ") " << *space.get();
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 1652ec9..372db3a 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -43,9 +43,14 @@
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image, const InstructionSet image_isa)
+  static ImageSpace* Create(const char* image, InstructionSet image_isa)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Reads the image header from the specified image location for the
+  // instruction set image_isa.
+  static ImageHeader* ReadImageHeaderOrDie(const char* image_location,
+                                           InstructionSet image_isa);
+
   // Releases the OatFile from the ImageSpace so it can be transfer to
   // the caller, presumably the ClassLinker.
   OatFile* ReleaseOatFile()
@@ -58,10 +63,18 @@
     return *reinterpret_cast<ImageHeader*>(Begin());
   }
 
+  // Actual filename where image was loaded from.
+  // For example: /data/dalvik-cache/arm/system@framework@boot.art
   const std::string GetImageFilename() const {
     return GetName();
   }
 
+  // Symbolic location for image.
+  // For example: /system/framework/boot.art
+  const std::string GetImageLocation() const {
+    return image_location_;
+  }
+
   accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return live_bitmap_.get();
   }
@@ -90,9 +103,21 @@
   // image's OatFile is up-to-date relative to its DexFile
   // inputs. Otherwise (for /data), validate the inputs and generate
   // the OatFile in /data/dalvik-cache if necessary.
-  static ImageSpace* Init(const char* image, bool validate_oat_file, std::string* error_msg)
+  static ImageSpace* Init(const char* image_filename, const char* image_location,
+                          bool validate_oat_file, std::string* error_msg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Returns the filename of the image corresponding to
+  // requested image_location, or the filename where a new image
+  // should be written if one doesn't exist. Looks for a generated
+  // image in the specified location and then in the dalvik-cache.
+  //
+  // Returns true if an image was found, false otherwise.
+  static bool FindImageFilename(const char* image_location,
+                                InstructionSet image_isa,
+                                std::string* location,
+                                bool* is_system);
+
   OatFile* OpenOatFile(const char* image, std::string* error_msg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -103,15 +128,17 @@
 
   static Atomic<uint32_t> bitmap_index_;
 
-  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
-  ImageSpace(const std::string& name, MemMap* mem_map,
-             accounting::ContinuousSpaceBitmap* live_bitmap);
+  ImageSpace(const std::string& name, const char* image_location,
+             MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
   // the ClassLinker during it's initialization.
-  UniquePtr<OatFile> oat_file_;
+  std::unique_ptr<OatFile> oat_file_;
+
+  const std::string image_location_;
 
   DISALLOW_COPY_AND_ASSIGN(ImageSpace);
 };
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index dc2769e..e63cc39 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -16,11 +16,12 @@
 
 #include "large_object_space.h"
 
+#include <memory>
+
 #include "gc/accounting/space_bitmap-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
-#include "UniquePtr.h"
 #include "image.h"
 #include "os.h"
 #include "space-inl.h"
@@ -140,7 +141,8 @@
 size_t LargeObjectMapSpace::Free(Thread* self, mirror::Object* ptr) {
   MutexLock mu(self, lock_);
   MemMaps::iterator found = mem_maps_.find(ptr);
-  CHECK(found != mem_maps_.end()) << "Attempted to free large object which was not live";
+  CHECK(found != mem_maps_.end()) << "Attempted to free large object" << ptr
+      << "which was not live";
   DCHECK_GE(num_bytes_allocated_, found->second->Size());
   size_t allocation_size = found->second->Size();
   num_bytes_allocated_ -= allocation_size;
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 0daefba..a84b43a 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -133,9 +133,9 @@
   // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<mirror::Object*,
-      accounting::GcAllocator<mirror::Object*> > large_objects_ GUARDED_BY(lock_);
+      accounting::GcAllocator<mirror::Object*>> large_objects_ GUARDED_BY(lock_);
   typedef SafeMap<mirror::Object*, MemMap*, std::less<mirror::Object*>,
-      accounting::GcAllocator<std::pair<mirror::Object*, MemMap*> > > MemMaps;
+      accounting::GcAllocator<std::pair<mirror::Object*, MemMap*>>> MemMaps;
   MemMaps mem_maps_ GUARDED_BY(lock_);
 };
 
@@ -256,11 +256,11 @@
   AllocationHeader* GetAllocationHeader(const mirror::Object* obj);
 
   typedef std::set<AllocationHeader*, AllocationHeader::SortByPrevFree,
-                   accounting::GcAllocator<AllocationHeader*> > FreeBlocks;
+                   accounting::GcAllocator<AllocationHeader*>> FreeBlocks;
 
   // There is not footer for any allocations at the end of the space, so we keep track of how much
   // free space there is at the end manually.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   size_t free_end_ GUARDED_BY(lock_);
   FreeBlocks free_blocks_ GUARDED_BY(lock_);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 8a6636d..23c67ff 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -39,7 +39,7 @@
 
     static const size_t num_allocations = 64;
     static const size_t max_allocation_size = 0x100000;
-    std::vector<std::pair<mirror::Object*, size_t> > requests;
+    std::vector<std::pair<mirror::Object*, size_t>> requests;
 
     for (size_t phase = 0; phase < 2; ++phase) {
       while (requests.size() < num_allocations) {
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 7493c19..e710409 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -24,7 +24,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
@@ -188,7 +188,7 @@
   VLOG(heap) << "Capacity " << PrettySize(capacity);
   // Remap the tail.
   std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
                                                     PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
   void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index dcf5357..8415fa1 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -17,9 +17,9 @@
 #ifndef ART_RUNTIME_GC_SPACE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_SPACE_H_
 
+#include <memory>
 #include <string>
 
-#include "UniquePtr.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
@@ -339,8 +339,8 @@
  protected:
   DiscontinuousSpace(const std::string& name, GcRetentionPolicy gc_retention_policy);
 
-  UniquePtr<accounting::LargeObjectBitmap> live_bitmap_;
-  UniquePtr<accounting::LargeObjectBitmap> mark_bitmap_;
+  std::unique_ptr<accounting::LargeObjectBitmap> live_bitmap_;
+  std::unique_ptr<accounting::LargeObjectBitmap> mark_bitmap_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(DiscontinuousSpace);
@@ -374,7 +374,7 @@
   }
 
   // Underlying storage of the space
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(MemMapSpace);
@@ -419,9 +419,9 @@
   virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
-  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
 
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 3335e72..a2d4942 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -20,10 +20,10 @@
 #include "zygote_space.h"
 
 #include <stdint.h>
+#include <memory>
 
 #include "common_runtime_test.h"
 #include "globals.h"
-#include "UniquePtr.h"
 #include "mirror/array-inl.h"
 #include "mirror/object-inl.h"
 
@@ -48,7 +48,8 @@
   }
 
   mirror::Class* GetByteArrayClass(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SirtRef<mirror::ClassLoader> null_loader(self, nullptr);
+    StackHandleScope<1> hs(self);
+    auto null_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
     if (byte_array_class_ == nullptr) {
       mirror::Class* byte_array_class =
           Runtime::Current()->GetClassLinker()->FindClass(self, "[B", null_loader);
@@ -62,10 +63,11 @@
   mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
                         size_t* bytes_allocated, size_t* usable_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SirtRef<mirror::Class> byte_array_class(self, GetByteArrayClass(self));
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
     mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size);
     if (obj != nullptr) {
-      InstallClass(obj, byte_array_class.get(), bytes);
+      InstallClass(obj, byte_array_class.Get(), bytes);
     }
     return obj;
   }
@@ -73,10 +75,11 @@
   mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
                                   size_t* bytes_allocated, size_t* usable_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SirtRef<mirror::Class> byte_array_class(self, GetByteArrayClass(self));
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
     mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size);
     if (obj != nullptr) {
-      InstallClass(obj, byte_array_class.get(), bytes);
+      InstallClass(obj, byte_array_class.Get(), bytes);
     }
     return obj;
   }
@@ -126,37 +129,37 @@
 void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
   {
     // Init < max == growth
-    UniquePtr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init == max == growth
-    UniquePtr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init > max == growth
-    UniquePtr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
   {
     // Growth == init < max
-    UniquePtr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Growth < init < max
-    UniquePtr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
   {
     // Init < growth < max
-    UniquePtr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
     EXPECT_TRUE(space.get() != nullptr);
   }
   {
     // Init < max < growth
-    UniquePtr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
     EXPECT_TRUE(space.get() == nullptr);
   }
 }
@@ -177,9 +180,10 @@
 
   // Succeeds, fits without adjusting the footprint limit.
   size_t ptr1_bytes_allocated, ptr1_usable_size;
-  SirtRef<mirror::Object> ptr1(self, Alloc(space, self, 1 * MB, &ptr1_bytes_allocated,
-                                           &ptr1_usable_size));
-  EXPECT_TRUE(ptr1.get() != nullptr);
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::Object> ptr1(
+      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
   EXPECT_LE(1U * MB, ptr1_bytes_allocated);
   EXPECT_LE(1U * MB, ptr1_usable_size);
   EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
@@ -190,9 +194,9 @@
 
   // Succeeds, adjusts the footprint.
   size_t ptr3_bytes_allocated, ptr3_usable_size;
-  SirtRef<mirror::Object> ptr3(self, AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated,
-                                                     &ptr3_usable_size));
-  EXPECT_TRUE(ptr3.get() != nullptr);
+  Handle<mirror::Object> ptr3(
+      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
   EXPECT_LE(8U * MB, ptr3_usable_size);
   EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
@@ -206,23 +210,23 @@
   EXPECT_TRUE(ptr5 == nullptr);
 
   // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.get(), nullptr);
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.reset(nullptr)));
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
   size_t ptr6_bytes_allocated, ptr6_usable_size;
-  SirtRef<mirror::Object> ptr6(self, AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated,
-                                                     &ptr6_usable_size));
-  EXPECT_TRUE(ptr6.get() != nullptr);
+  Handle<mirror::Object> ptr6(
+      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
   EXPECT_LE(9U * MB, ptr6_bytes_allocated);
   EXPECT_LE(9U * MB, ptr6_usable_size);
   EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
 
   // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.get(), nullptr);
-  space->Free(self, ptr1.reset(nullptr));
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
   EXPECT_LE(1U * MB, free1);
 
   // Make sure that the zygote space isn't directly at the start of the space.
@@ -243,8 +247,8 @@
   AddSpace(space, false);
 
   // Succeeds, fits without adjusting the footprint limit.
-  ptr1.reset(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size));
-  EXPECT_TRUE(ptr1.get() != nullptr);
+  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
   EXPECT_LE(1U * MB, ptr1_bytes_allocated);
   EXPECT_LE(1U * MB, ptr1_usable_size);
   EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
@@ -254,16 +258,16 @@
   EXPECT_TRUE(ptr2 == nullptr);
 
   // Succeeds, adjusts the footprint.
-  ptr3.reset(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size));
-  EXPECT_TRUE(ptr3.get() != nullptr);
+  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
   EXPECT_LE(2U * MB, ptr3_bytes_allocated);
   EXPECT_LE(2U * MB, ptr3_usable_size);
   EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  space->Free(self, ptr3.reset(nullptr));
+  space->Free(self, ptr3.Assign(nullptr));
 
   // Final clean up.
-  free1 = space->AllocationSize(ptr1.get(), nullptr);
-  space->Free(self, ptr1.reset(nullptr));
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
   EXPECT_LE(1U * MB, free1);
 }
 
@@ -279,9 +283,10 @@
 
   // Succeeds, fits without adjusting the footprint limit.
   size_t ptr1_bytes_allocated, ptr1_usable_size;
-  SirtRef<mirror::Object> ptr1(self, Alloc(space, self, 1 * MB, &ptr1_bytes_allocated,
-                                           &ptr1_usable_size));
-  EXPECT_TRUE(ptr1.get() != nullptr);
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::Object> ptr1(
+      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
   EXPECT_LE(1U * MB, ptr1_bytes_allocated);
   EXPECT_LE(1U * MB, ptr1_usable_size);
   EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
@@ -292,9 +297,9 @@
 
   // Succeeds, adjusts the footprint.
   size_t ptr3_bytes_allocated, ptr3_usable_size;
-  SirtRef<mirror::Object> ptr3(self, AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated,
-                                                     &ptr3_usable_size));
-  EXPECT_TRUE(ptr3.get() != nullptr);
+  Handle<mirror::Object> ptr3(
+      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
   EXPECT_LE(8U * MB, ptr3_usable_size);
   EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
@@ -308,23 +313,23 @@
   EXPECT_TRUE(ptr5 == nullptr);
 
   // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.get(), nullptr);
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.reset(nullptr));
+  space->Free(self, ptr3.Assign(nullptr));
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
   size_t ptr6_bytes_allocated, ptr6_usable_size;
-  SirtRef<mirror::Object> ptr6(self, AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated,
-                                                     &ptr6_usable_size));
-  EXPECT_TRUE(ptr6.get() != nullptr);
+  Handle<mirror::Object> ptr6(
+      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
   EXPECT_LE(9U * MB, ptr6_bytes_allocated);
   EXPECT_LE(9U * MB, ptr6_usable_size);
   EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
 
   // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.get(), nullptr);
-  space->Free(self, ptr1.reset(nullptr));
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
   EXPECT_LE(1U * MB, free1);
 }
 
@@ -345,8 +350,6 @@
     lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
                                &usable_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    SirtRef<mirror::Object> obj(self, lots_of_objects[i]);
-    lots_of_objects[i] = obj.get();
     size_t computed_usable_size;
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
     EXPECT_EQ(usable_size, computed_usable_size);
@@ -360,8 +363,6 @@
     size_t allocation_size, usable_size;
     lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    SirtRef<mirror::Object> obj(self, lots_of_objects[i]);
-    lots_of_objects[i] = obj.get();
     size_t computed_usable_size;
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
     EXPECT_EQ(usable_size, computed_usable_size);
@@ -397,7 +398,7 @@
 
   // Fill the space with lots of small objects up to the growth limit
   size_t max_objects = (growth_limit / (object_size > 0 ? object_size : 8)) + 1;
-  UniquePtr<mirror::Object*[]> lots_of_objects(new mirror::Object*[max_objects]);
+  std::unique_ptr<mirror::Object*[]> lots_of_objects(new mirror::Object*[max_objects]);
   size_t last_object = 0;  // last object for which allocation succeeded
   size_t amount_allocated = 0;  // amount of space allocated
   Thread* self = Thread::Current();
@@ -418,18 +419,19 @@
           alloc_size = size_of_zero_length_byte_array;
         }
       }
-      SirtRef<mirror::Object> object(self, nullptr);
+      StackHandleScope<1> hs(soa.Self());
+      auto object(hs.NewHandle<mirror::Object>(nullptr));
       size_t bytes_allocated = 0;
       if (round <= 1) {
-        object.reset(Alloc(space, self, alloc_size, &bytes_allocated, nullptr));
+        object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr));
       } else {
-        object.reset(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr));
+        object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr));
       }
       footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
-      if (object.get() != nullptr) {  // allocation succeeded
-        lots_of_objects[i] = object.get();
-        size_t allocation_size = space->AllocationSize(object.get(), nullptr);
+      if (object.Get() != nullptr) {  // allocation succeeded
+        lots_of_objects[i] = object.Get();
+        size_t allocation_size = space->AllocationSize(object.Get(), nullptr);
         EXPECT_EQ(bytes_allocated, allocation_size);
         if (object_size > 0) {
           EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
@@ -509,16 +511,17 @@
   space->RevokeAllThreadLocalBuffers();
 
   // All memory was released, try a large allocation to check freed memory is being coalesced
-  SirtRef<mirror::Object> large_object(self, nullptr);
+  StackHandleScope<1> hs(soa.Self());
+  auto large_object(hs.NewHandle<mirror::Object>(nullptr));
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
   size_t bytes_allocated = 0;
   if (round <= 1) {
-    large_object.reset(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr));
+    large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr));
   } else {
-    large_object.reset(AllocWithGrowth(space, self, three_quarters_space, &bytes_allocated,
-                                       nullptr));
+    large_object.Assign(AllocWithGrowth(space, self, three_quarters_space, &bytes_allocated,
+                                        nullptr));
   }
-  EXPECT_TRUE(large_object.get() != nullptr);
+  EXPECT_TRUE(large_object.Get() != nullptr);
 
   // Sanity check footprint
   footprint = space->GetFootprint();
@@ -527,7 +530,7 @@
   EXPECT_LE(space->Size(), growth_limit);
 
   // Clean up
-  space->Free(self, large_object.reset(nullptr));
+  space->Free(self, large_object.Assign(nullptr));
 
   // Sanity check footprint
   footprint = space->GetFootprint();
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index 0466413..fb3a12e 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -115,7 +115,7 @@
     // Need to mark the card since this will update the mod-union table next GC cycle.
     card_table->MarkCard(ptrs[i]);
   }
-  zygote_space->objects_allocated_.FetchAndSub(num_ptrs);
+  zygote_space->objects_allocated_.FetchAndSubSequentiallyConsistent(num_ptrs);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 50fc62b..5d5fe76 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -65,7 +65,7 @@
   }
 
   uint64_t GetObjectsAllocated() {
-    return objects_allocated_;
+    return objects_allocated_.LoadSequentiallyConsistent();
   }
 
   void Clear() OVERRIDE;
diff --git a/runtime/globals.h b/runtime/globals.h
index e3c54b8..07fadb9 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -20,6 +20,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "read_barrier_c.h"
+#include "read_barrier_option.h"
 
 namespace art {
 
@@ -54,9 +55,8 @@
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
 
-// ARM64 instruction alignment. AArch64 require code to be 4-byte aligned.
-// AArch64 ELF requires at least 4.
-static constexpr size_t kArm64Alignment = 4;
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
 
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?
diff --git a/runtime/handle.h b/runtime/handle.h
new file mode 100644
index 0000000..b70f651
--- /dev/null
+++ b/runtime/handle.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_HANDLE_H_
+#define ART_RUNTIME_HANDLE_H_
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "stack.h"
+
+namespace art {
+
+class Thread;
+
+template<class T>
+class Handle {
+ public:
+  Handle() : reference_(nullptr) {
+  }
+  Handle(const Handle<T>& handle) ALWAYS_INLINE : reference_(handle.reference_) {
+  }
+  Handle<T>& operator=(const Handle<T>& handle) ALWAYS_INLINE {
+    reference_ = handle.reference_;
+    return *this;
+  }
+  explicit Handle(StackReference<T>* reference) ALWAYS_INLINE : reference_(reference) {
+  }
+  T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    return *Get();
+  }
+  T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    return Get();
+  }
+  T* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    return reference_->AsMirrorPtr();
+  }
+  T* Assign(T* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    T* old = reference_->AsMirrorPtr();
+    reference_->Assign(reference);
+    return old;
+  }
+  jobject ToJObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
+      // Special case so that we work with NullHandles.
+      return nullptr;
+    }
+    return reinterpret_cast<jobject>(reference_);
+  }
+
+ protected:
+  StackReference<T>* reference_;
+
+  template<typename S>
+  explicit Handle(StackReference<S>* reference)
+      : reference_(reinterpret_cast<StackReference<T>*>(reference)) {
+  }
+  template<typename S>
+  explicit Handle(const Handle<S>& handle)
+      : reference_(reinterpret_cast<StackReference<T>*>(handle.reference_)) {
+  }
+
+  StackReference<T>* GetReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    return reference_;
+  }
+
+ private:
+  friend class BuildGenericJniFrameVisitor;
+  template<class S> friend class Handle;
+  friend class HandleScope;
+  template<class S> friend class HandleWrapper;
+  template<size_t kNumReferences> friend class StackHandleScope;
+};
+
+template<class T>
+class NullHandle : public Handle<T> {
+ public:
+  NullHandle() : Handle<T>(&null_ref_) {
+  }
+
+ private:
+  StackReference<T> null_ref_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_HANDLE_H_
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
new file mode 100644
index 0000000..634f2be
--- /dev/null
+++ b/runtime/handle_scope-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_HANDLE_SCOPE_INL_H_
+#define ART_RUNTIME_HANDLE_SCOPE_INL_H_
+
+#include "handle_scope-inl.h"
+
+#include "handle.h"
+#include "thread.h"
+
+namespace art {
+
+template<size_t kNumReferences>
+inline StackHandleScope<kNumReferences>::StackHandleScope(Thread* self)
+    : HandleScope(kNumReferences), self_(self), pos_(0) {
+  // TODO: Figure out how to use a compile assert.
+  DCHECK_EQ(OFFSETOF_MEMBER(HandleScope, references_),
+            OFFSETOF_MEMBER(StackHandleScope<1>, references_storage_));
+  for (size_t i = 0; i < kNumReferences; ++i) {
+    SetReference(i, nullptr);
+  }
+  self_->PushHandleScope(this);
+}
+
+template<size_t kNumReferences>
+inline StackHandleScope<kNumReferences>::~StackHandleScope() {
+  HandleScope* top_handle_scope = self_->PopHandleScope();
+  DCHECK_EQ(top_handle_scope, this);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_HANDLE_SCOPE_INL_H_
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
new file mode 100644
index 0000000..f2e059d
--- /dev/null
+++ b/runtime/handle_scope.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_HANDLE_SCOPE_H_
+#define ART_RUNTIME_HANDLE_SCOPE_H_
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "handle.h"
+#include "stack.h"
+#include "utils.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}
+class Thread;
+
+// HandleScopes can be allocated within the bridge frame between managed and native code backed by
+// stack storage or manually allocated in native.
+class HandleScope {
+ public:
+  ~HandleScope() {}
+
+  // Number of references contained within this handle scope.
+  uint32_t NumberOfReferences() const {
+    return number_of_references_;
+  }
+
+  // We have versions with and without explicit pointer size of the following. The first two are
+  // used at runtime, so OFFSETOF_MEMBER computes the right offsets automatically. The last one
+  // takes the pointer size explicitly so that at compile time we can cross-compile correctly.
+
+  // Returns the size of a HandleScope containing num_references handles.
+  static size_t SizeOf(uint32_t num_references) {
+    size_t header_size = OFFSETOF_MEMBER(HandleScope, references_);
+    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
+    return header_size + data_size;
+  }
+
+  // Get the size of the handle scope for the number of entries, with padding added for potential alignment.
+  static size_t GetAlignedHandleScopeSize(uint32_t num_references) {
+    size_t handle_scope_size = SizeOf(num_references);
+    return RoundUp(handle_scope_size, 8);
+  }
+
+  // Get the size of the handle scope for the number of entries, with padding added for potential alignment.
+  static size_t GetAlignedHandleScopeSizeTarget(size_t pointer_size, uint32_t num_references) {
+    // Assume that the layout is packed.
+    size_t header_size = pointer_size + sizeof(number_of_references_);
+    // This assumes there is no layout change between 32 and 64b.
+    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
+    size_t handle_scope_size = header_size + data_size;
+    return RoundUp(handle_scope_size, 8);
+  }
+
+  // Link to previous HandleScope or null.
+  HandleScope* GetLink() const {
+    return link_;
+  }
+
+  void SetLink(HandleScope* link) {
+    DCHECK_NE(this, link);
+    link_ = link;
+  }
+
+  // Sets the number_of_references_ field for constructing tables out of raw memory. Warning: will
+  // not resize anything.
+  void SetNumberOfReferences(uint32_t num_references) {
+    number_of_references_ = num_references;
+  }
+
+  mirror::Object* GetReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    DCHECK_LT(i, number_of_references_);
+    return references_[i].AsMirrorPtr();
+  }
+
+  Handle<mirror::Object> GetHandle(size_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    DCHECK_LT(i, number_of_references_);
+    return Handle<mirror::Object>(&references_[i]);
+  }
+
+  void SetReference(size_t i, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE {
+    DCHECK_LT(i, number_of_references_);
+    references_[i].Assign(object);
+  }
+
+  bool Contains(StackReference<mirror::Object>* handle_scope_entry) const {
+    // A HandleScope should always contain something. One created by the
+    // jni_compiler should have a jobject/jclass as a native method is
+    // passed in a this pointer or a class
+    DCHECK_GT(number_of_references_, 0U);
+    return ((&references_[0] <= handle_scope_entry)
+            && (handle_scope_entry <= (&references_[number_of_references_ - 1])));
+  }
+
+  // Offset of link within HandleScope, used by generated code
+  static size_t LinkOffset(size_t pointer_size) {
+    return 0;
+  }
+
+  // Offset of length within handle scope, used by generated code
+  static size_t NumberOfReferencesOffset(size_t pointer_size) {
+    return pointer_size;
+  }
+
+  // Offset of link within handle scope, used by generated code
+  static size_t ReferencesOffset(size_t pointer_size) {
+    return pointer_size + sizeof(number_of_references_);
+  }
+
+ protected:
+  explicit HandleScope(size_t number_of_references) :
+      link_(nullptr), number_of_references_(number_of_references) {
+  }
+
+  HandleScope* link_;
+  uint32_t number_of_references_;
+
+  // number_of_references_ are available if this is allocated and filled in by jni_compiler.
+  StackReference<mirror::Object> references_[0];
+
+ private:
+  template<size_t kNumReferences> friend class StackHandleScope;
+  DISALLOW_COPY_AND_ASSIGN(HandleScope);
+};
+
+// A wrapper which wraps around Object** and restores the pointer in the destructor.
+// TODO: Add more functionality.
+template<class T>
+class HandleWrapper : public Handle<T> {
+ public:
+  HandleWrapper(T** obj, const Handle<T>& handle)
+     : Handle<T>(handle), obj_(obj) {
+  }
+
+  ~HandleWrapper() {
+    *obj_ = Handle<T>::Get();
+  }
+
+ private:
+  T** obj_;
+};
+
+// Scoped handle storage of a fixed size that is usually stack allocated.
+template<size_t kNumReferences>
+class StackHandleScope : public HandleScope {
+ public:
+  explicit StackHandleScope(Thread* self);
+  ~StackHandleScope();
+
+  template<class T>
+  Handle<T> NewHandle(T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReference(pos_, object);
+    return Handle<T>(GetHandle(pos_++));
+  }
+
+  template<class T>
+  HandleWrapper<T> NewHandleWrapper(T** object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReference(pos_, *object);
+    Handle<T> h(GetHandle(pos_++));
+    return HandleWrapper<T>(object, h);
+  }
+
+ private:
+  // references_storage_ needs to be first so that it matches the address of references_.
+  StackReference<mirror::Object> references_storage_[kNumReferences];
+  Thread* const self_;
+  size_t pos_;
+
+  template<size_t kNumRefs> friend class StackHandleScope;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_HANDLE_SCOPE_H_
diff --git a/runtime/stack_indirect_reference_table_test.cc b/runtime/handle_scope_test.cc
similarity index 60%
rename from runtime/stack_indirect_reference_table_test.cc
rename to runtime/handle_scope_test.cc
index 72ef6b6..de563c1 100644
--- a/runtime/stack_indirect_reference_table_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,33 +14,49 @@
  * limitations under the License.
  */
 
-#include "stack_indirect_reference_table.h"
 #include "gtest/gtest.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
 
 namespace art {
 
-// Test the offsets computed for members of StackIndirectReferenceTable. Because of cross-compiling
+// Handle scope with a fixed size which is allocated on the stack.
+template<size_t kNumReferences>
+class NoThreadStackHandleScope : public HandleScope {
+ public:
+  explicit NoThreadStackHandleScope() : HandleScope(kNumReferences) {
+  }
+  ~NoThreadStackHandleScope() {
+  }
+
+ private:
+  // references_storage_ needs to be first so that it matches the address of references_
+  StackReference<mirror::Object> references_storage_[kNumReferences];
+};
+
+// Test the offsets computed for members of HandleScope. Because of cross-compiling
 // it is impossible the use OFFSETOF_MEMBER, so we do some reasonable computations ourselves. This
 // test checks whether we do the right thing.
-TEST(StackIndirectReferenceTableTest, Offsets) {
-  // As the members of StackIndirectReferenceTable are private, we cannot use OFFSETOF_MEMBER
+TEST(HandleScopeTest, Offsets) NO_THREAD_SAFETY_ANALYSIS {
+  // As the members of HandleScope are private, we cannot use OFFSETOF_MEMBER
   // here. So do the inverse: set some data, and access it through pointers created from the offsets.
-
-  StackIndirectReferenceTable test_table(reinterpret_cast<mirror::Object*>(0x1234));
-  test_table.SetLink(reinterpret_cast<StackIndirectReferenceTable*>(0x5678));
+  NoThreadStackHandleScope<1> test_table;
+  test_table.SetReference(0, reinterpret_cast<mirror::Object*>(0x1234));
+  test_table.SetLink(reinterpret_cast<HandleScope*>(0x5678));
   test_table.SetNumberOfReferences(0x9ABC);
 
   byte* table_base_ptr = reinterpret_cast<byte*>(&test_table);
 
   {
     uintptr_t* link_ptr = reinterpret_cast<uintptr_t*>(table_base_ptr +
-        StackIndirectReferenceTable::LinkOffset(kPointerSize));
+        HandleScope::LinkOffset(kPointerSize));
     EXPECT_EQ(*link_ptr, static_cast<size_t>(0x5678));
   }
 
   {
     uint32_t* num_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        StackIndirectReferenceTable::NumberOfReferencesOffset(kPointerSize));
+        HandleScope::NumberOfReferencesOffset(kPointerSize));
     EXPECT_EQ(*num_ptr, static_cast<size_t>(0x9ABC));
   }
 
@@ -50,7 +66,7 @@
     EXPECT_EQ(sizeof(StackReference<mirror::Object>), sizeof(uint32_t));
 
     uint32_t* ref_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
-        StackIndirectReferenceTable::ReferencesOffset(kPointerSize));
+        HandleScope::ReferencesOffset(kPointerSize));
     EXPECT_EQ(*ref_ptr, static_cast<uint32_t>(0x1234));
   }
 }
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index fc8b594..91f1718 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -475,7 +475,7 @@
         }
       }
 
-      UniquePtr<File> file(new File(out_fd, filename_));
+      std::unique_ptr<File> file(new File(out_fd, filename_));
       okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
           file->WriteFully(body_data_ptr_, body_data_size_);
       if (!okay) {
diff --git a/runtime/image.h b/runtime/image.h
index ce2bc58..abe1ad8 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -91,7 +91,7 @@
   static std::string GetOatLocationFromImageLocation(const std::string& image) {
     std::string oat_filename = image;
     if (oat_filename.length() <= 3) {
-      return oat_filename + ".oat";
+      oat_filename += ".oat";
     } else {
       oat_filename.replace(oat_filename.length() - 3, 3, "oat");
     }
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
new file mode 100644
index 0000000..42a9757
--- /dev/null
+++ b/runtime/indirect_reference_table-inl.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
+#define ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
+
+#include "indirect_reference_table.h"
+
+#include "verify_object-inl.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+
+// Verifies that the indirect table lookup is valid.
+// Returns "false" if something looks bad.
+inline bool IndirectReferenceTable::GetChecked(IndirectRef iref) const {
+  if (UNLIKELY(iref == nullptr)) {
+    LOG(WARNING) << "Attempt to look up NULL " << kind_;
+    return false;
+  }
+  if (UNLIKELY(GetIndirectRefKind(iref) == kHandleScopeOrInvalid)) {
+    LOG(ERROR) << "JNI ERROR (app bug): invalid " << kind_ << " " << iref;
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  const int topIndex = segment_state_.parts.topIndex;
+  int idx = ExtractIndex(iref);
+  if (UNLIKELY(idx >= topIndex)) {
+    LOG(ERROR) << "JNI ERROR (app bug): accessed stale " << kind_ << " "
+               << iref << " (index " << idx << " in a table of size " << topIndex << ")";
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  if (UNLIKELY(table_[idx] == nullptr)) {
+    LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref;
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  if (UNLIKELY(!CheckEntry("use", iref, idx))) {
+    return false;
+  }
+  return true;
+}
+
+// Make sure that the entry at "idx" is correctly paired with "iref".
+inline bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
+  const mirror::Object* obj = table_[idx];
+  IndirectRef checkRef = ToIndirectRef(obj, idx);
+  if (UNLIKELY(checkRef != iref)) {
+    LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what
+               << " stale " << kind_ << " " << iref
+               << " (should be " << checkRef << ")";
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  return true;
+}
+
+inline mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
+  if (!GetChecked(iref)) {
+    return kInvalidIndirectRefObject;
+  }
+  mirror::Object* obj = table_[ExtractIndex(iref)];
+  if (LIKELY(obj != kClearedJniWeakGlobal)) {
+    VerifyObject(obj);
+  }
+  return obj;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 987df91..432481b 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include "indirect_reference_table.h"
+#include "indirect_reference_table-inl.h"
+
 #include "jni_internal.h"
 #include "reference_table.h"
 #include "runtime.h"
@@ -53,7 +54,7 @@
   return os;
 }
 
-static void AbortMaybe() {
+void IndirectReferenceTable::AbortIfNoCheckJNI() {
   // If -Xcheck:jni is on, it'll give a more detailed error before aborting.
   if (!Runtime::Current()->GetJavaVM()->check_jni) {
     // Otherwise, we want to abort rather than hand back a bad reference.
@@ -65,14 +66,25 @@
                                                size_t maxCount, IndirectRefKind desiredKind) {
   CHECK_GT(initialCount, 0U);
   CHECK_LE(initialCount, maxCount);
-  CHECK_NE(desiredKind, kSirtOrInvalid);
+  CHECK_NE(desiredKind, kHandleScopeOrInvalid);
 
-  table_ = reinterpret_cast<mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
-  CHECK(table_ != NULL);
-  memset(table_, 0xd1, initialCount * sizeof(const mirror::Object*));
+  std::string error_str;
+  const size_t initial_bytes = initialCount * sizeof(const mirror::Object*);
+  const size_t table_bytes = maxCount * sizeof(const mirror::Object*);
+  table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
+                                            PROT_READ | PROT_WRITE, false, &error_str));
+  CHECK(table_mem_map_.get() != nullptr) << error_str;
 
-  slot_data_ = reinterpret_cast<IndirectRefSlot*>(calloc(initialCount, sizeof(IndirectRefSlot)));
-  CHECK(slot_data_ != NULL);
+  table_ = reinterpret_cast<mirror::Object**>(table_mem_map_->Begin());
+  CHECK(table_ != nullptr);
+  memset(table_, 0xd1, initial_bytes);
+
+  const size_t slot_bytes = maxCount * sizeof(IndirectRefSlot);
+  slot_mem_map_.reset(MemMap::MapAnonymous("indirect ref table slots", nullptr, slot_bytes,
+                                           PROT_READ | PROT_WRITE, false, &error_str));
+  CHECK(slot_mem_map_.get() != nullptr) << error_str;
+  slot_data_ = reinterpret_cast<IndirectRefSlot*>(slot_mem_map_->Begin());
+  CHECK(slot_data_ != nullptr);
 
   segment_state_.all = IRT_FIRST_SEGMENT;
   alloc_entries_ = initialCount;
@@ -81,25 +93,6 @@
 }
 
 IndirectReferenceTable::~IndirectReferenceTable() {
-  free(table_);
-  free(slot_data_);
-  table_ = NULL;
-  slot_data_ = NULL;
-  alloc_entries_ = max_entries_ = -1;
-}
-
-// Make sure that the entry at "idx" is correctly paired with "iref".
-bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
-  const mirror::Object* obj = table_[idx];
-  IndirectRef checkRef = ToIndirectRef(obj, idx);
-  if (UNLIKELY(checkRef != iref)) {
-    LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what
-               << " stale " << kind_ << " " << iref
-               << " (should be " << checkRef << ")";
-    AbortMaybe();
-    return false;
-  }
-  return true;
 }
 
 IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
@@ -127,20 +120,6 @@
     }
     DCHECK_GT(newSize, alloc_entries_);
 
-    table_ = reinterpret_cast<mirror::Object**>(realloc(table_, newSize * sizeof(mirror::Object*)));
-    slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_,
-                                                            newSize * sizeof(IndirectRefSlot)));
-    if (table_ == NULL || slot_data_ == NULL) {
-      LOG(FATAL) << "JNI ERROR (app bug): unable to expand "
-                 << kind_ << " table (from "
-                 << alloc_entries_ << " to " << newSize
-                 << ", max=" << max_entries_ << ")\n"
-                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
-    }
-
-    // Clear the newly-allocated slot_data_ elements.
-    memset(slot_data_ + alloc_entries_, 0, (newSize - alloc_entries_) * sizeof(IndirectRefSlot));
-
     alloc_entries_ = newSize;
   }
 
@@ -185,55 +164,6 @@
   }
 }
 
-// Verifies that the indirect table lookup is valid.
-// Returns "false" if something looks bad.
-bool IndirectReferenceTable::GetChecked(IndirectRef iref) const {
-  if (UNLIKELY(iref == NULL)) {
-    LOG(WARNING) << "Attempt to look up NULL " << kind_;
-    return false;
-  }
-  if (UNLIKELY(GetIndirectRefKind(iref) == kSirtOrInvalid)) {
-    LOG(ERROR) << "JNI ERROR (app bug): invalid " << kind_ << " " << iref;
-    AbortMaybe();
-    return false;
-  }
-
-  int topIndex = segment_state_.parts.topIndex;
-  int idx = ExtractIndex(iref);
-  if (UNLIKELY(idx >= topIndex)) {
-    LOG(ERROR) << "JNI ERROR (app bug): accessed stale " << kind_ << " "
-               << iref << " (index " << idx << " in a table of size " << topIndex << ")";
-    AbortMaybe();
-    return false;
-  }
-
-  if (UNLIKELY(table_[idx] == NULL)) {
-    LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref;
-    AbortMaybe();
-    return false;
-  }
-
-  if (UNLIKELY(!CheckEntry("use", iref, idx))) {
-    return false;
-  }
-
-  return true;
-}
-
-static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex,
-                mirror::Object** table) {
-  for (int i = bottomIndex; i < topIndex; ++i) {
-    if (table[i] == direct_pointer) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-bool IndirectReferenceTable::ContainsDirectPointer(mirror::Object* direct_pointer) const {
-  return Find(direct_pointer, 0, segment_state_.parts.topIndex, table_) != -1;
-}
-
 // Removes an object. We extract the table offset bits from "iref"
 // and zap the corresponding entry, leaving a hole if it's not at the top.
 // If the entry is not between the current top index and the bottom index
@@ -254,9 +184,9 @@
 
   int idx = ExtractIndex(iref);
 
-  if (GetIndirectRefKind(iref) == kSirtOrInvalid &&
-      Thread::Current()->SirtContains(reinterpret_cast<jobject>(iref))) {
-    LOG(WARNING) << "Attempt to remove local SIRT entry from IRT, ignoring";
+  if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid &&
+      Thread::Current()->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
+    LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
     return true;
   }
 
@@ -346,15 +276,4 @@
   ReferenceTable::Dump(os, entries);
 }
 
-mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
-  if (!GetChecked(iref)) {
-    return kInvalidIndirectRefObject;
-  }
-  mirror::Object* obj = table_[ExtractIndex(iref)];;
-  if (obj != kClearedJniWeakGlobal) {
-    VerifyObject(obj);
-  }
-  return obj;
-}
-
 }  // namespace art
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index a2de726..5015410 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -24,6 +24,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "mem_map.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -72,7 +73,7 @@
  * To make everything fit nicely in 32-bit integers, the maximum size of
  * the table is capped at 64K.
  *
- * None of the table functions are synchronized.
+ * Only SynchronizedGet is synchronized.
  */
 
 /*
@@ -110,7 +111,7 @@
  * For convenience these match up with enum jobjectRefType from jni.h.
  */
 enum IndirectRefKind {
-  kSirtOrInvalid = 0,  // <<stack indirect reference table or invalid reference>>
+  kHandleScopeOrInvalid = 0,  // <<stack indirect reference table or invalid reference>>
   kLocal         = 1,  // <<local reference>>
   kGlobal        = 2,  // <<global reference>>
   kWeakGlobal    = 3   // <<weak global reference>>
@@ -191,11 +192,6 @@
  * and local refs to improve performance.  A large circular buffer might
  * reduce the amortized cost of adding global references.
  *
- * TODO: if we can guarantee that the underlying storage doesn't move,
- * e.g. by using oversized mmap regions to handle expanding tables, we may
- * be able to avoid having to synchronize lookups.  Might make sense to
- * add a "synchronized lookup" call that takes the mutex as an argument,
- * and either locks or doesn't lock based on internal details.
  */
 union IRTSegmentState {
   uint32_t          all;
@@ -234,7 +230,7 @@
     }
   }
 
-  mirror::Object** table_;
+  mirror::Object** const table_;
   size_t i_;
   size_t capacity_;
 };
@@ -267,10 +263,15 @@
    *
    * Returns kInvalidIndirectRefObject if iref is invalid.
    */
-  mirror::Object* Get(IndirectRef iref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* Get(IndirectRef iref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE;
 
-  // TODO: remove when we remove work_around_app_jni_bugs support.
-  bool ContainsDirectPointer(mirror::Object* direct_pointer) const;
+  // Synchronized get which reads a reference, acquiring a lock if necessary.
+  mirror::Object* SynchronizedGet(Thread* /*self*/, ReaderWriterMutex* /*mutex*/,
+                                  IndirectRef iref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return Get(iref);
+  }
 
   /*
    * Remove an existing entry.
@@ -351,6 +352,9 @@
     }
   }
 
+  // Abort if check_jni is not enabled.
+  static void AbortIfNoCheckJNI();
+
   /* extra debugging checks */
   bool GetChecked(IndirectRef) const;
   bool CheckEntry(const char*, IndirectRef, int) const;
@@ -358,6 +362,10 @@
   /* semi-public - read/write by jni down calls */
   IRTSegmentState segment_state_;
 
+  // Mem map where we store the indirect refs.
+  std::unique_ptr<MemMap> table_mem_map_;
+  // Mem map where we store the extended debugging info.
+  std::unique_ptr<MemMap> slot_mem_map_;
   /* bottom of the stack */
   mirror::Object** table_;
   /* bit mask, ORed into all irefs */
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index 9b42e59..449817a 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "indirect_reference_table.h"
+#include "indirect_reference_table-inl.h"
 
 #include "common_runtime_test.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index bfbbbd6..679c575 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -35,7 +35,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
-const char* GetInstructionSetString(const InstructionSet isa);
+const char* GetInstructionSetString(InstructionSet isa);
 InstructionSet GetInstructionSetFromString(const char* instruction_set);
 
 size_t GetInstructionSetPointerSize(InstructionSet isa);
@@ -59,7 +59,8 @@
 #endif
 
 enum InstructionFeatures {
-  kHwDiv = 1                  // Supports hardware divide.
+  kHwDiv  = 0x1,              // Supports hardware divide.
+  kHwLpae = 0x2,              // Supports Large Physical Address Extension.
 };
 
 // This is a bitmask of supported features per architecture.
@@ -78,6 +79,14 @@
     mask_ = (mask_ & ~kHwDiv) | (v ? kHwDiv : 0);
   }
 
+  bool HasLpae() const {
+    return (mask_ & kHwLpae) != 0;
+  }
+
+  void SetHasLpae(bool v) {
+    mask_ = (mask_ & ~kHwLpae) | (v ? kHwLpae : 0);
+  }
+
   std::string GetFeatureString() const;
 
   // Other features in here.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 77d29dd..2dbcc80 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -252,7 +252,7 @@
   }
 
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   uintptr_t instrumentation_exit_pc = GetQuickInstrumentationExitPc();
   InstallStackVisitor visitor(thread, context.get(), instrumentation_exit_pc);
   visitor.WalkStack(true);
@@ -522,9 +522,9 @@
 void Instrumentation::InstrumentQuickAllocEntryPoints() {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
-  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.Load(), 0);
+  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool enable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndAdd(1) == 0;
+      quick_alloc_entry_points_instrumentation_counter_.FetchAndAddSequentiallyConsistent(1) == 0;
   if (enable_instrumentation) {
     SetEntrypointsInstrumented(true);
   }
@@ -533,9 +533,9 @@
 void Instrumentation::UninstrumentQuickAllocEntryPoints() {
   // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
   //       should be guarded by a lock.
-  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.Load(), 0);
+  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.LoadSequentiallyConsistent(), 0);
   const bool disable_instrumentation =
-      quick_alloc_entry_points_instrumentation_counter_.FetchAndSub(1) == 1;
+      quick_alloc_entry_points_instrumentation_counter_.FetchAndSubSequentiallyConsistent(1) == 1;
   if (disable_instrumentation) {
     SetEntrypointsInstrumented(false);
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 3de0728..5630862 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -238,6 +238,7 @@
 
   bool IsActive() const {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
+        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index dfc82dd..339eb36 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -16,13 +16,14 @@
 
 #include "intern_table.h"
 
+#include <memory>
+
 #include "gc/space/image_space.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
-#include "UniquePtr.h"
 #include "utf.h"
 
 namespace art {
@@ -83,7 +84,8 @@
 
 mirror::String* InternTable::Lookup(Table& table, mirror::String* s, int32_t hash_code) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
-  for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
+  for (auto it = table.lower_bound(hash_code), end = table.end();
+       it != end && it->first == hash_code; ++it) {
     mirror::String* existing_string = it->second;
     if (existing_string->Equals(s)) {
       return existing_string;
@@ -122,7 +124,8 @@
 }
 
 void InternTable::Remove(Table& table, mirror::String* s, int32_t hash_code) {
-  for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
+  for (auto it = table.lower_bound(hash_code), end = table.end();
+       it != end && it->first == hash_code; ++it) {
     if (it->second == s) {
       table.erase(it);
       return;
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 7dd06c6..47d5e09 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -105,7 +105,7 @@
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   Table strong_interns_ GUARDED_BY(Locks::intern_table_lock_);
-  std::vector<std::pair<int32_t, mirror::String*> > new_strong_intern_roots_
+  std::vector<std::pair<int32_t, mirror::String*>> new_strong_intern_roots_
       GUARDED_BY(Locks::intern_table_lock_);
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
 };
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 8987127..5995d9e 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -18,7 +18,7 @@
 
 #include "common_runtime_test.h"
 #include "mirror/object.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -27,19 +27,21 @@
 TEST_F(InternTableTest, Intern) {
   ScopedObjectAccess soa(Thread::Current());
   InternTable intern_table;
-  SirtRef<mirror::String> foo_1(soa.Self(), intern_table.InternStrong(3, "foo"));
-  SirtRef<mirror::String> foo_2(soa.Self(), intern_table.InternStrong(3, "foo"));
-  SirtRef<mirror::String> foo_3(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-  SirtRef<mirror::String> bar(soa.Self(), intern_table.InternStrong(3, "bar"));
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::String> foo_1(hs.NewHandle(intern_table.InternStrong(3, "foo")));
+  Handle<mirror::String> foo_2(hs.NewHandle(intern_table.InternStrong(3, "foo")));
+  Handle<mirror::String> foo_3(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+  Handle<mirror::String> bar(hs.NewHandle(intern_table.InternStrong(3, "bar")));
   EXPECT_TRUE(foo_1->Equals("foo"));
   EXPECT_TRUE(foo_2->Equals("foo"));
   EXPECT_TRUE(foo_3->Equals("foo"));
-  EXPECT_TRUE(foo_1.get() != NULL);
-  EXPECT_TRUE(foo_2.get() != NULL);
-  EXPECT_EQ(foo_1.get(), foo_2.get());
-  EXPECT_NE(foo_1.get(), bar.get());
-  EXPECT_NE(foo_2.get(), bar.get());
-  EXPECT_NE(foo_3.get(), bar.get());
+  EXPECT_TRUE(foo_1.Get() != NULL);
+  EXPECT_TRUE(foo_2.Get() != NULL);
+  EXPECT_EQ(foo_1.Get(), foo_2.Get());
+  EXPECT_NE(foo_1.Get(), bar.Get());
+  EXPECT_NE(foo_2.Get(), bar.Get());
+  EXPECT_NE(foo_3.Get(), bar.Get());
 }
 
 TEST_F(InternTableTest, Size) {
@@ -47,8 +49,10 @@
   InternTable t;
   EXPECT_EQ(0U, t.Size());
   t.InternStrong(3, "foo");
-  SirtRef<mirror::String> foo(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-  t.InternWeak(foo.get());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> foo(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+  t.InternWeak(foo.Get());
   EXPECT_EQ(1U, t.Size());
   t.InternStrong(3, "bar");
   EXPECT_EQ(2U, t.Size());
@@ -93,19 +97,20 @@
   InternTable t;
   t.InternStrong(3, "foo");
   t.InternStrong(3, "bar");
-  SirtRef<mirror::String> hello(soa.Self(),
-                                mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello"));
-  SirtRef<mirror::String> world(soa.Self(),
-                                mirror::String::AllocFromModifiedUtf8(soa.Self(), "world"));
-  SirtRef<mirror::String> s0(soa.Self(), t.InternWeak(hello.get()));
-  SirtRef<mirror::String> s1(soa.Self(), t.InternWeak(world.get()));
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::String> hello(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello")));
+  Handle<mirror::String> world(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "world")));
+  Handle<mirror::String> s0(hs.NewHandle(t.InternWeak(hello.Get())));
+  Handle<mirror::String> s1(hs.NewHandle(t.InternWeak(world.Get())));
 
   EXPECT_EQ(4U, t.Size());
 
   // We should traverse only the weaks...
   TestPredicate p;
-  p.Expect(s0.get());
-  p.Expect(s1.get());
+  p.Expect(s0.Get());
+  p.Expect(s1.Get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
     t.SweepInternTableWeaks(IsMarkedSweepingCallback, &p);
@@ -114,9 +119,9 @@
   EXPECT_EQ(2U, t.Size());
 
   // Just check that we didn't corrupt the map.
-  SirtRef<mirror::String> still_here(soa.Self(),
-                                     mirror::String::AllocFromModifiedUtf8(soa.Self(), "still here"));
-  t.InternWeak(still_here.get());
+  Handle<mirror::String> still_here(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "still here")));
+  t.InternWeak(still_here.Get());
   EXPECT_EQ(3U, t.Size());
 }
 
@@ -125,48 +130,53 @@
   {
     // Strongs are never weak.
     InternTable t;
-    SirtRef<mirror::String> interned_foo_1(soa.Self(), t.InternStrong(3, "foo"));
-    EXPECT_FALSE(t.ContainsWeak(interned_foo_1.get()));
-    SirtRef<mirror::String> interned_foo_2(soa.Self(), t.InternStrong(3, "foo"));
-    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.get()));
-    EXPECT_EQ(interned_foo_1.get(), interned_foo_2.get());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::String> interned_foo_1(hs.NewHandle(t.InternStrong(3, "foo")));
+    EXPECT_FALSE(t.ContainsWeak(interned_foo_1.Get()));
+    Handle<mirror::String> interned_foo_2(hs.NewHandle(t.InternStrong(3, "foo")));
+    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.Get()));
+    EXPECT_EQ(interned_foo_1.Get(), interned_foo_2.Get());
   }
 
   {
     // Weaks are always weak.
     InternTable t;
-    SirtRef<mirror::String> foo_1(soa.Self(),
-                                  mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-    SirtRef<mirror::String> foo_2(soa.Self(),
-                                  mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-    EXPECT_NE(foo_1.get(), foo_2.get());
-    SirtRef<mirror::String> interned_foo_1(soa.Self(), t.InternWeak(foo_1.get()));
-    SirtRef<mirror::String> interned_foo_2(soa.Self(), t.InternWeak(foo_2.get()));
-    EXPECT_TRUE(t.ContainsWeak(interned_foo_2.get()));
-    EXPECT_EQ(interned_foo_1.get(), interned_foo_2.get());
+    StackHandleScope<4> hs(soa.Self());
+    Handle<mirror::String> foo_1(
+        hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+    Handle<mirror::String> foo_2(
+        hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+    EXPECT_NE(foo_1.Get(), foo_2.Get());
+    Handle<mirror::String> interned_foo_1(hs.NewHandle(t.InternWeak(foo_1.Get())));
+    Handle<mirror::String> interned_foo_2(hs.NewHandle(t.InternWeak(foo_2.Get())));
+    EXPECT_TRUE(t.ContainsWeak(interned_foo_2.Get()));
+    EXPECT_EQ(interned_foo_1.Get(), interned_foo_2.Get());
   }
 
   {
     // A weak can be promoted to a strong.
     InternTable t;
-    SirtRef<mirror::String> foo(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-    SirtRef<mirror::String> interned_foo_1(soa.Self(), t.InternWeak(foo.get()));
-    EXPECT_TRUE(t.ContainsWeak(interned_foo_1.get()));
-    SirtRef<mirror::String> interned_foo_2(soa.Self(), t.InternStrong(3, "foo"));
-    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.get()));
-    EXPECT_EQ(interned_foo_1.get(), interned_foo_2.get());
+    StackHandleScope<3> hs(soa.Self());
+    Handle<mirror::String> foo(
+        hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+    Handle<mirror::String> interned_foo_1(hs.NewHandle(t.InternWeak(foo.Get())));
+    EXPECT_TRUE(t.ContainsWeak(interned_foo_1.Get()));
+    Handle<mirror::String> interned_foo_2(hs.NewHandle(t.InternStrong(3, "foo")));
+    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.Get()));
+    EXPECT_EQ(interned_foo_1.Get(), interned_foo_2.Get());
   }
 
   {
     // Interning a weak after a strong gets you the strong.
     InternTable t;
-    SirtRef<mirror::String> interned_foo_1(soa.Self(), t.InternStrong(3, "foo"));
-    EXPECT_FALSE(t.ContainsWeak(interned_foo_1.get()));
-    SirtRef<mirror::String> foo(soa.Self(),
-                                mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo"));
-    SirtRef<mirror::String> interned_foo_2(soa.Self(), t.InternWeak(foo.get()));
-    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.get()));
-    EXPECT_EQ(interned_foo_1.get(), interned_foo_2.get());
+    StackHandleScope<3> hs(soa.Self());
+    Handle<mirror::String> interned_foo_1(hs.NewHandle(t.InternStrong(3, "foo")));
+    EXPECT_FALSE(t.ContainsWeak(interned_foo_1.Get()));
+    Handle<mirror::String> foo(
+        hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
+    Handle<mirror::String> interned_foo_2(hs.NewHandle(t.InternWeak(foo.Get())));
+    EXPECT_FALSE(t.ContainsWeak(interned_foo_2.Get()));
+    EXPECT_EQ(interned_foo_1.Get(), interned_foo_2.Get());
   }
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 1e1a8c1..f77a0f6 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -27,12 +27,18 @@
 static void UnstartedRuntimeJni(Thread* self, ArtMethod* method,
                                 Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(Runtime::Current()->IsActiveTransaction()) << "Calling native method "
-                                                    << PrettyMethod(method)
-                                                    << " in unstarted runtime should only happen"
-                                                    << " in a transaction";
   std::string name(PrettyMethod(method));
-  if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
+  if (name == "java.lang.Object dalvik.system.VMRuntime.newUnpaddedArray(java.lang.Class, int)") {
+    int32_t length = args[1];
+    DCHECK_GE(length, 0);
+    mirror::Class* element_class = reinterpret_cast<Object*>(args[0])->AsClass();
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, element_class);
+    DCHECK(array_class != nullptr);
+    gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
+                                            array_class->GetComponentSize(), allocator, true));
+  } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
     NthCallerVisitor visitor(self, 3);
@@ -43,7 +49,8 @@
     value.SetJ((static_cast<uint64_t>(args[1]) << 32) | args[0]);
     result->SetD(log(value.GetD()));
   } else if (name == "java.lang.String java.lang.Class.getNameNative()") {
-    result->SetL(receiver->AsClass()->ComputeName());
+    StackHandleScope<1> hs(self);
+    result->SetL(mirror::Class::ComputeName(hs.NewHandle(receiver->AsClass())));
   } else if (name == "int java.lang.Float.floatToRawIntBits(float)") {
     result->SetI(args[0]);
   } else if (name == "float java.lang.Float.intBitsToFloat(int)") {
@@ -65,13 +72,17 @@
   } else if (name == "int java.lang.String.fastIndexOf(int, int)") {
     result->SetI(receiver->AsString()->FastIndexOf(args[0], args[1]));
   } else if (name == "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") {
-    SirtRef<mirror::Class> sirt_class(self, reinterpret_cast<Object*>(args[0])->AsClass());
-    SirtRef<mirror::IntArray> sirt_dimensions(self,
-                                              reinterpret_cast<Object*>(args[1])->AsIntArray());
-    result->SetL(Array::CreateMultiArray(self, sirt_class, sirt_dimensions));
+    StackHandleScope<2> hs(self);
+    auto h_class(hs.NewHandle(reinterpret_cast<mirror::Class*>(args[0])->AsClass()));
+    auto h_dimensions(hs.NewHandle(reinterpret_cast<mirror::IntArray*>(args[1])->AsIntArray()));
+    result->SetL(Array::CreateMultiArray(self, h_class, h_dimensions));
   } else if (name == "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") {
     ScopedObjectAccessUnchecked soa(self);
-    result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace<true>(soa)));
+    if (Runtime::Current()->IsActiveTransaction()) {
+      result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace<true>(soa)));
+    } else {
+      result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace<false>(soa)));
+    }
   } else if (name == "int java.lang.System.identityHashCode(java.lang.Object)") {
     mirror::Object* obj = reinterpret_cast<Object*>(args[0]);
     result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
@@ -82,13 +93,22 @@
     jlong offset = (static_cast<uint64_t>(args[2]) << 32) | args[1];
     jint expectedValue = args[3];
     jint newValue = args[4];
-    bool success = obj->CasField32<true>(MemberOffset(offset), expectedValue, newValue);
+    bool success;
+    if (Runtime::Current()->IsActiveTransaction()) {
+      success = obj->CasField32<true>(MemberOffset(offset), expectedValue, newValue);
+    } else {
+      success = obj->CasField32<false>(MemberOffset(offset), expectedValue, newValue);
+    }
     result->SetZ(success ? JNI_TRUE : JNI_FALSE);
   } else if (name == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
     Object* obj = reinterpret_cast<Object*>(args[0]);
     jlong offset = (static_cast<uint64_t>(args[2]) << 32) | args[1];
     Object* newValue = reinterpret_cast<Object*>(args[3]);
-    obj->SetFieldObject<true>(MemberOffset(offset), newValue);
+    if (Runtime::Current()->IsActiveTransaction()) {
+      obj->SetFieldObject<true>(MemberOffset(offset), newValue);
+    } else {
+      obj->SetFieldObject<false>(MemberOffset(offset), newValue);
+    }
   } else if (name == "int sun.misc.Unsafe.getArrayBaseOffsetForComponentType(java.lang.Class)") {
     mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
     Primitive::Type primitive_type = component->GetPrimitiveType();
@@ -97,9 +117,13 @@
     mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
     Primitive::Type primitive_type = component->GetPrimitiveType();
     result->SetI(Primitive::ComponentSize(primitive_type));
-  } else {
+  } else if (Runtime::Current()->IsActiveTransaction()) {
     AbortTransaction(self, "Attempt to invoke native method in non-started runtime: %s",
                      name.c_str());
+
+  } else {
+    LOG(FATAL) << "Calling native method " << PrettyMethod(method) << " in an unstarted "
+        "non-transactional runtime";
   }
 }
 
@@ -432,8 +456,9 @@
   // Do this after populating the shadow frame in case EnsureInitialized causes a GC.
   if (method->IsStatic() && UNLIKELY(!method->GetDeclaringClass()->IsInitializing())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    SirtRef<mirror::Class> sirt_c(self, method->GetDeclaringClass());
-    if (UNLIKELY(!class_linker->EnsureInitialized(sirt_c, true, true))) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
+    if (UNLIKELY(!class_linker->EnsureInitialized(h_class, true, true))) {
       CHECK(self->IsExceptionPending());
       self->PopShadowFrame();
       return;
@@ -499,15 +524,17 @@
   ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
-    SirtRef<Class> declaringClass(self, method->GetDeclaringClass());
-    if (UNLIKELY(!declaringClass->IsInitializing())) {
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass, true,
-                                                                            true))) {
-        DCHECK(Thread::Current()->IsExceptionPending());
+    mirror::Class* declaring_class = method->GetDeclaringClass();
+    if (UNLIKELY(!declaring_class->IsInitializing())) {
+      StackHandleScope<1> hs(self);
+      HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(
+          h_declaring_class, true, true))) {
+        DCHECK(self->IsExceptionPending());
         self->PopShadowFrame();
         return;
       }
-      CHECK(declaringClass->IsInitializing());
+      CHECK(h_declaring_class->IsInitializing());
     }
   }
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 5660508..63ae6fd 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -85,7 +85,7 @@
     if (is_range) {
       vregC = inst->VRegC_3rc();
     } else {
-      inst->GetArgs(arg, inst_data);
+      inst->GetVarArgs(arg, inst_data);
     }
 
     // Handle receiver apart since it's not part of the shorty.
@@ -117,8 +117,8 @@
                                        "Ljava/lang/VirtualMachineError;",
                                        "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
                                        mh.GetName(), shorty_pos,
-                                       ClassHelper(o->GetClass()).GetDescriptor(),
-                                       ClassHelper(arg_type).GetDescriptor());
+                                       o->GetClass()->GetDescriptor().c_str(),
+                                       arg_type->GetDescriptor().c_str());
               return false;
             }
           }
@@ -224,7 +224,7 @@
   if (is_range) {
     vregC = inst->VRegC_3rc();
   } else {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
   for (int32_t i = 0; i < length; ++i) {
@@ -296,14 +296,18 @@
     // other variants that take more arguments should also be added.
     std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
 
-    SirtRef<ClassLoader> class_loader(self, nullptr);  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
-    Class* found = Runtime::Current()->GetClassLinker()->FindClass(self, descriptor.c_str(),
-                                                                   class_loader);
+    // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
+    Class* found = Runtime::Current()->GetClassLinker()->FindClass(
+        self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
     CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
         << PrettyDescriptor(descriptor);
     result->SetL(found);
+  } else if (name == "java.lang.Class java.lang.Void.lookupType()") {
+    result->SetL(Runtime::Current()->GetClassLinker()->FindPrimitiveClass('V'));
   } else if (name == "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") {
-    SirtRef<ClassLoader> class_loader(self, down_cast<mirror::ClassLoader*>(shadow_frame->GetVRegReference(arg_offset)));
+    StackHandleScope<1> hs(self);
+    Handle<ClassLoader> class_loader(
+        hs.NewHandle(down_cast<mirror::ClassLoader*>(shadow_frame->GetVRegReference(arg_offset))));
     std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset + 1)->AsString()->ToModifiedUtf8().c_str()));
 
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(self, descriptor.c_str(),
@@ -313,10 +317,11 @@
     Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
     ArtMethod* c = klass->FindDeclaredDirectMethod("<init>", "()V");
     CHECK(c != NULL);
-    SirtRef<Object> obj(self, klass->AllocObject(self));
-    CHECK(obj.get() != NULL);
-    EnterInterpreterFromInvoke(self, c, obj.get(), NULL, NULL);
-    result->SetL(obj.get());
+    StackHandleScope<1> hs(self);
+    Handle<Object> obj(hs.NewHandle(klass->AllocObject(self)));
+    CHECK(obj.Get() != NULL);
+    EnterInterpreterFromInvoke(self, c, obj.Get(), NULL, NULL);
+    result->SetL(obj.Get());
   } else if (name == "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") {
     // Special managed code cut-out to allow field lookup in a un-started runtime that'd fail
     // going the reflective Dex way.
@@ -348,13 +353,20 @@
     // TODO: getDeclaredField calls GetType once the field is found to ensure a
     //       NoClassDefFoundError is thrown if the field's type cannot be resolved.
     Class* jlr_Field = self->DecodeJObject(WellKnownClasses::java_lang_reflect_Field)->AsClass();
-    SirtRef<Object> field(self, jlr_Field->AllocNonMovableObject(self));
-    CHECK(field.get() != NULL);
+    StackHandleScope<1> hs(self);
+    Handle<Object> field(hs.NewHandle(jlr_Field->AllocNonMovableObject(self)));
+    CHECK(field.Get() != NULL);
     ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
     uint32_t args[1];
     args[0] = StackReference<mirror::Object>::FromMirrorPtr(found).AsVRegValue();
-    EnterInterpreterFromInvoke(self, c, field.get(), args, NULL);
-    result->SetL(field.get());
+    EnterInterpreterFromInvoke(self, c, field.Get(), args, NULL);
+    result->SetL(field.Get());
+  } else if (name == "int java.lang.Object.hashCode()") {
+    Object* obj = shadow_frame->GetVRegReference(arg_offset);
+    result->SetI(obj->IdentityHashCode());
+  } else if (name == "java.lang.String java.lang.reflect.ArtMethod.getMethodName(java.lang.reflect.ArtMethod)") {
+    ArtMethod* method = shadow_frame->GetVRegReference(arg_offset)->AsArtMethod();
+    result->SetL(MethodHelper(method).GetNameAsString());
   } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
              name == "void java.lang.System.arraycopy(char[], int, char[], int, int)") {
     // Special case array copying without initializing System.
@@ -381,7 +393,18 @@
         dst->Set(dstPos + i, src->Get(srcPos + i));
       }
     } else {
-      UNIMPLEMENTED(FATAL) << "System.arraycopy of unexpected type: " << PrettyDescriptor(ctype);
+      self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(), "Ljava/lang/InternalError;",
+                               "Unimplemented System.arraycopy for type '%s'",
+                               PrettyDescriptor(ctype).c_str());
+    }
+  } else  if (name == "java.lang.Object java.lang.ThreadLocal.get()") {
+    std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
+    if (caller == "java.lang.String java.lang.IntegralToString.convertInt(java.lang.AbstractStringBuilder, int)") {
+      // Allocate non-threadlocal buffer.
+      result->SetL(mirror::CharArray::Alloc(self, 11));
+    } else {
+      self->ThrowNewException(self->GetCurrentLocationForThrow(), "Ljava/lang/InternalError;",
+                              "Unimplemented ThreadLocal.get");
     }
   } else {
     // Not special, continue with regular interpreter execution.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 819b79d..cfc90a6 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -29,6 +29,7 @@
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "gc/accounting/card_table-inl.h"
+#include "handle_scope-inl.h"
 #include "nth_caller_visitor.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -59,6 +60,15 @@
 using ::art::mirror::String;
 using ::art::mirror::Throwable;
 
+// b/14882674 Workaround stack overflow issue with clang
+#if defined(__clang__) && defined(__aarch64__)
+#define SOMETIMES_INLINE __attribute__((noinline))
+#define SOMETIMES_INLINE_KEYWORD
+#else
+#define SOMETIMES_INLINE ALWAYS_INLINE
+#define SOMETIMES_INLINE_KEYWORD inline
+#endif
+
 namespace art {
 namespace interpreter {
 
@@ -103,9 +113,10 @@
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
-  ArtMethod* const method = FindMethodFromCode<type, do_access_check>(method_idx, receiver,
-                                                                      shadow_frame.GetMethod(),
-                                                                      self);
+  mirror::ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const method = FindMethodFromCode<type, do_access_check>(
+      method_idx, &receiver, &sf_method, self);
+  // The shadow frame should already be pushed, so we don't need to update it.
   if (UNLIKELY(method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
@@ -152,8 +163,8 @@
 // Handles iget-XXX and sget-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
-static inline bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
-                              const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
+                                                const Instruction* inst, uint16_t inst_data) {
   const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
   const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
   ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
@@ -211,7 +222,7 @@
 // Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type>
-static inline bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
   Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
@@ -285,8 +296,8 @@
 // Handles iput-XXX and sput-XXX instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check, bool transaction_active>
-static inline bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
-                              const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame,
+                                                const Instruction* inst, uint16_t inst_data) {
   bool do_assignability_check = do_access_check;
   bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
@@ -339,15 +350,19 @@
     case Primitive::kPrimNot: {
       Object* reg = shadow_frame.GetVRegReference(vregA);
       if (do_assignability_check && reg != nullptr) {
+        // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
+        // object in the destructor.
+        StackHandleScope<1> hs(self);
+        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(&obj));
         Class* field_class = FieldHelper(f).GetType();
         if (!reg->VerifierInstanceOf(field_class)) {
           // This should never happen.
           self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                    "Ljava/lang/VirtualMachineError;",
                                    "Put '%s' that is not instance of field '%s' in '%s'",
-                                   ClassHelper(reg->GetClass()).GetDescriptor(),
-                                   ClassHelper(field_class).GetDescriptor(),
-                                   ClassHelper(f->GetDeclaringClass()).GetDescriptor());
+                                   reg->GetClass()->GetDescriptor().c_str(),
+                                   field_class->GetDescriptor().c_str(),
+                                   f->GetDeclaringClass()->GetDescriptor().c_str());
           return false;
         }
       }
@@ -363,7 +378,8 @@
 // Handles iput-quick, iput-wide-quick and iput-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type, bool transaction_active>
-static inline bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
+static SOMETIMES_INLINE_KEYWORD bool DoIPutQuick(const ShadowFrame& shadow_frame,
+                                                 const Instruction* inst, uint16_t inst_data) {
   Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
@@ -410,8 +426,9 @@
   Class* java_lang_string_class = String::GetJavaLangString();
   if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    SirtRef<mirror::Class> sirt_class(self, java_lang_string_class);
-    if (UNLIKELY(!class_linker->EnsureInitialized(sirt_class, true, true))) {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> h_class(hs.NewHandle(java_lang_string_class));
+    if (UNLIKELY(!class_linker->EnsureInitialized(h_class, true, true))) {
       DCHECK(self->IsExceptionPending());
       return nullptr;
     }
@@ -559,21 +576,27 @@
                                                              uint32_t dex_pc,
                                                              mirror::Object* this_object,
                                                              const instrumentation::Instrumentation* instrumentation)
-    ALWAYS_INLINE;
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE;
 
 static inline uint32_t FindNextInstructionFollowingException(Thread* self,
                                                              ShadowFrame& shadow_frame,
                                                              uint32_t dex_pc,
                                                              mirror::Object* this_object,
-                                                             const instrumentation::Instrumentation* instrumentation)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                                                             const instrumentation::Instrumentation* instrumentation) {
   self->VerifyStack();
   ThrowLocation throw_location;
   mirror::Throwable* exception = self->GetException(&throw_location);
   bool clear_exception = false;
-  SirtRef<mirror::Class> exception_class(self, exception->GetClass());
+  bool new_exception = false;
+  StackHandleScope<3> hs(self);
+  Handle<mirror::Class> exception_class(hs.NewHandle(exception->GetClass()));
   uint32_t found_dex_pc = shadow_frame.GetMethod()->FindCatchBlock(exception_class, dex_pc,
-                                                                   &clear_exception);
+                                                                   &clear_exception,
+                                                                   &new_exception);
+  if (UNLIKELY(new_exception)) {
+    // Update the exception.
+    exception = self->GetException(&throw_location);
+  }
   if (found_dex_pc == DexFile::kDexNoIndex) {
     instrumentation->MethodUnwindEvent(self, this_object,
                                        shadow_frame.GetMethod(), dex_pc);
@@ -631,7 +654,7 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                       \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                    \
   bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
@@ -652,7 +675,7 @@
 
 // Explicitly instantiate all DoFieldGet functions.
 #define EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL(_find_type, _field_type, _do_check)                \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                           \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                        \
   bool DoFieldGet<_find_type, _field_type, _do_check>(Thread* self, ShadowFrame& shadow_frame, \
                                                       const Instruction* inst, uint16_t inst_data)
 
@@ -683,7 +706,7 @@
 
 // Explicitly instantiate all DoFieldPut functions.
 #define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active)                      \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE                                                   \
   bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, const ShadowFrame& shadow_frame, \
                                                                            const Instruction* inst, uint16_t inst_data)
 
@@ -716,7 +739,7 @@
 
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
   bool DoInvokeVirtualQuick<_is_range>(Thread* self, ShadowFrame& shadow_frame,      \
                                        const Instruction* inst, uint16_t inst_data,  \
                                        JValue* result)
@@ -727,7 +750,7 @@
 
 // Explicitly instantiate all DoIGetQuick functions.
 #define EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(_field_type)                            \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                 \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE              \
   bool DoIGetQuick<_field_type>(ShadowFrame& shadow_frame, const Instruction* inst,  \
                                 uint16_t inst_data)
 
@@ -738,7 +761,7 @@
 
 // Explicitly instantiate all DoIPutQuick functions.
 #define EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL(_field_type, _transaction_active)        \
-  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE                  \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SOMETIMES_INLINE               \
   bool DoIPutQuick<_field_type, _transaction_active>(const ShadowFrame& shadow_frame, \
                                                      const Instruction* inst,         \
                                                      uint16_t inst_data)
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 74b7c42..9a274f6 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -145,12 +145,14 @@
   const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
   uint16_t inst_data;
   const void* const* currentHandlersTable;
+  bool notified_method_entry_event = false;
   UPDATE_HANDLER_TABLE();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
+      notified_method_entry_event = true;
     }
   }
 
@@ -232,9 +234,9 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(MOVE_EXCEPTION) {
-    Throwable* exception = self->GetException(NULL);
-    self->ClearException();
+    Throwable* exception = self->GetException(nullptr);
     shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+    self->ClearException();
     ADVANCE(1);
   }
   HANDLE_INSTRUCTION_END();
@@ -255,6 +257,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -271,6 +276,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -288,6 +296,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -304,6 +315,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -328,8 +342,8 @@
         self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                  "Ljava/lang/VirtualMachineError;",
                                  "Returning '%s' that is not instance of return type '%s'",
-                                 ClassHelper(obj_result->GetClass()).GetDescriptor(),
-                                 ClassHelper(return_type).GetDescriptor());
+                                 obj_result->GetClass()->GetDescriptor().c_str(),
+                                 return_type->GetDescriptor().c_str());
         HANDLE_PENDING_EXCEPTION();
       }
     }
@@ -338,6 +352,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -519,7 +536,7 @@
       // Don't allow finalizable objects to be allocated during a transaction since these can't be
       // finalized without a started runtime.
       if (transaction_active && obj->GetClass()->IsFinalizable()) {
-        AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+        AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                          PrettyTypeOf(obj).c_str());
         HANDLE_PENDING_EXCEPTION();
       }
@@ -597,7 +614,7 @@
       self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                "Ljava/lang/VirtualMachineError;",
                                "Throwing '%s' that is not instance of Throwable",
-                               ClassHelper(exception->GetClass()).GetDescriptor());
+                               exception->GetClass()->GetDescriptor().c_str());
     } else {
       self->SetException(shadow_frame.GetCurrentLocationForThrow(), exception->AsThrowable());
     }
@@ -2384,16 +2401,32 @@
     }
   }
 
-  // Create alternative instruction handlers dedicated to instrumentation.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                              \
-  alt_op_##code: {                                                                                  \
-      instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-      if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                         \
-        instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),    \
-                                         shadow_frame.GetMethod(), dex_pc);                         \
-      }                                                                                             \
-      UPDATE_HANDLER_TABLE();                                                                       \
-      goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];                   \
+// Create alternative instruction handlers dedicated to instrumentation.
+// Return instructions must not call Instrumentation::DexPcMovedEvent since they already call
+// Instrumentation::MethodExited. This is to avoid posting debugger events twice for this location.
+// Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
+// compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
+// a constant condition that would remove the "if" statement so the test is free.
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
+  alt_op_##code: {                                                                                \
+    if (Instruction::code != Instruction::RETURN_VOID &&                                          \
+        Instruction::code != Instruction::RETURN_VOID_BARRIER &&                                  \
+        Instruction::code != Instruction::RETURN &&                                               \
+        Instruction::code != Instruction::RETURN_WIDE &&                                          \
+        Instruction::code != Instruction::RETURN_OBJECT) {                                        \
+      if (LIKELY(!notified_method_entry_event)) {                                                 \
+        Runtime* runtime = Runtime::Current();                                                    \
+        const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
+        if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
+          Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
+          instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
+        }                                                                                         \
+      } else {                                                                                    \
+        notified_method_entry_event = false;                                                      \
+      }                                                                                           \
+    }                                                                                             \
+    UPDATE_HANDLER_TABLE();                                                                       \
+    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];                   \
   }
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUMENTATION_INSTRUCTION_HANDLER)
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0da1445..68759ad 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -21,7 +21,7 @@
 
 #define HANDLE_PENDING_EXCEPTION()                                                              \
   do {                                                                                          \
-    CHECK(self->IsExceptionPending());                                                          \
+    DCHECK(self->IsExceptionPending());                                                         \
     if (UNLIKELY(self->TestAllFlags())) {                                                       \
       CheckSuspend(self);                                                                       \
     }                                                                                           \
@@ -48,11 +48,20 @@
   } while (false)
 
 // Code to run before each dex instruction.
-#define PREAMBLE()
+#define PREAMBLE()                                                                              \
+  do {                                                                                          \
+    DCHECK(!inst->IsReturn());                                                                  \
+    if (UNLIKELY(notified_method_entry_event)) {                                                \
+      notified_method_entry_event = false;                                                      \
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                \
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),  \
+                                       shadow_frame.GetMethod(), dex_pc);                       \
+    }                                                                                           \
+  } while (false)
 
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                                ShadowFrame& shadow_frame, JValue result_register) {
+                         ShadowFrame& shadow_frame, JValue result_register) {
   bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
@@ -61,11 +70,13 @@
   self->VerifyStack();
 
   uint32_t dex_pc = shadow_frame.GetDexPC();
+  bool notified_method_entry_event = false;
   const instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing..
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
+      notified_method_entry_event = true;
     }
   }
   const uint16_t* const insns = code_item->insns_;
@@ -74,10 +85,6 @@
   while (true) {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
-    if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
-    }
     TraceExecution(shadow_frame, inst, dex_pc, mh);
     inst_data = inst->Fetch16(0);
     switch (inst->Opcode(inst_data)) {
@@ -156,14 +163,13 @@
         break;
       case Instruction::MOVE_EXCEPTION: {
         PREAMBLE();
-        Throwable* exception = self->GetException(NULL);
-        self->ClearException();
+        Throwable* exception = self->GetException(nullptr);
         shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+        self->ClearException();
         inst = inst->Next_1xx();
         break;
       }
       case Instruction::RETURN_VOID: {
-        PREAMBLE();
         JValue result;
         if (do_access_check) {
           // If access checks are required then the dex-to-dex compiler and analysis of
@@ -178,11 +184,13 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_VOID_BARRIER: {
-        PREAMBLE();
         QuasiAtomic::MembarStoreLoad();
         JValue result;
         if (UNLIKELY(self->TestAllFlags())) {
@@ -192,11 +200,13 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN: {
-        PREAMBLE();
         JValue result;
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
@@ -207,11 +217,13 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_WIDE: {
-        PREAMBLE();
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         if (UNLIKELY(self->TestAllFlags())) {
@@ -221,11 +233,13 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
-        PREAMBLE();
         JValue result;
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
@@ -244,8 +258,8 @@
             self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                      "Ljava/lang/VirtualMachineError;",
                                      "Returning '%s' that is not instance of return type '%s'",
-                                     ClassHelper(obj_result->GetClass()).GetDescriptor(),
-                                     ClassHelper(return_type).GetDescriptor());
+                                     obj_result->GetClass()->GetDescriptor().c_str(),
+                                     return_type->GetDescriptor().c_str());
             HANDLE_PENDING_EXCEPTION();
           }
         }
@@ -253,6 +267,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -432,7 +449,7 @@
           // Don't allow finalizable objects to be allocated during a transaction since these can't
           // be finalized without a started runtime.
           if (transaction_active && obj->GetClass()->IsFinalizable()) {
-            AbortTransaction(self, "Allocating finalizable object in transcation: %s",
+            AbortTransaction(self, "Allocating finalizable object in transaction: %s",
                              PrettyTypeOf(obj).c_str());
             HANDLE_PENDING_EXCEPTION();
             break;
@@ -511,7 +528,7 @@
           self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                    "Ljava/lang/VirtualMachineError;",
                                    "Throwing '%s' that is not instance of Throwable",
-                                   ClassHelper(exception->GetClass()).GetDescriptor());
+                                   exception->GetClass()->GetDescriptor().c_str());
         } else {
           self->SetException(shadow_frame.GetCurrentLocationForThrow(), exception->AsThrowable());
         }
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index cbf35be..fe91bb6 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -362,7 +362,7 @@
       }
 
       if (wake_pipe_[0] >= 0 && FD_ISSET(wake_pipe_[0], &readfds)) {
-        LOG(DEBUG) << "Got wake-up signal, bailing out of select";
+        VLOG(jdwp) << "Got wake-up signal, bailing out of select";
         goto fail;
       }
       if (control_sock_ >= 0 && FD_ISSET(control_sock_, &readfds)) {
@@ -385,7 +385,7 @@
           if (errno != EINTR) {
             goto fail;
           }
-          LOG(DEBUG) << "+++ EINTR hit";
+          VLOG(jdwp) << "+++ EINTR hit";
           return true;
         } else if (readCount == 0) {
           /* EOF hit -- far end went away */
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 223b7a1..cb2c420 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -136,6 +136,28 @@
     }
 }
 
+uint32_t GetInstrumentationEventFor(JdwpEventKind eventKind) {
+  switch (eventKind) {
+    case EK_BREAKPOINT:
+    case EK_SINGLE_STEP:
+      return instrumentation::Instrumentation::kDexPcMoved;
+    case EK_EXCEPTION:
+    case EK_EXCEPTION_CATCH:
+      return instrumentation::Instrumentation::kExceptionCaught;
+    case EK_METHOD_ENTRY:
+      return instrumentation::Instrumentation::kMethodEntered;
+    case EK_METHOD_EXIT:
+    case EK_METHOD_EXIT_WITH_RETURN_VALUE:
+      return instrumentation::Instrumentation::kMethodExited;
+    case EK_FIELD_ACCESS:
+      return instrumentation::Instrumentation::kFieldRead;
+    case EK_FIELD_MODIFICATION:
+      return instrumentation::Instrumentation::kFieldWritten;
+    default:
+      return 0;
+  }
+}
+
 /*
  * Add an event to the list.  Ordering is not important.
  *
@@ -148,30 +170,40 @@
   CHECK(pEvent->prev == NULL);
   CHECK(pEvent->next == NULL);
 
-  /*
-   * If one or more "break"-type mods are used, register them with
-   * the interpreter.
-   */
-  DeoptimizationRequest req;
-  for (int i = 0; i < pEvent->modCount; i++) {
-    const JdwpEventMod* pMod = &pEvent->mods[i];
-    if (pMod->modKind == MK_LOCATION_ONLY) {
-      /* should only be for Breakpoint, Step, and Exception */
-      Dbg::WatchLocation(&pMod->locationOnly.loc, &req);
-    } else if (pMod->modKind == MK_STEP) {
-      /* should only be for EK_SINGLE_STEP; should only be one */
-      JdwpStepSize size = static_cast<JdwpStepSize>(pMod->step.size);
-      JdwpStepDepth depth = static_cast<JdwpStepDepth>(pMod->step.depth);
-      JdwpError status = Dbg::ConfigureStep(pMod->step.threadId, size, depth);
-      if (status != ERR_NONE) {
-        return status;
+  {
+    /*
+     * If one or more "break"-type mods are used, register them with
+     * the interpreter.
+     */
+    DeoptimizationRequest req;
+    for (int i = 0; i < pEvent->modCount; i++) {
+      const JdwpEventMod* pMod = &pEvent->mods[i];
+      if (pMod->modKind == MK_LOCATION_ONLY) {
+        /* should only be for Breakpoint, Step, and Exception */
+        Dbg::WatchLocation(&pMod->locationOnly.loc, &req);
+      } else if (pMod->modKind == MK_STEP) {
+        /* should only be for EK_SINGLE_STEP; should only be one */
+        JdwpStepSize size = static_cast<JdwpStepSize>(pMod->step.size);
+        JdwpStepDepth depth = static_cast<JdwpStepDepth>(pMod->step.depth);
+        JdwpError status = Dbg::ConfigureStep(pMod->step.threadId, size, depth);
+        if (status != ERR_NONE) {
+          return status;
+        }
       }
     }
+    if (NeedsFullDeoptimization(pEvent->eventKind)) {
+      CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
+      CHECK(req.method == nullptr);
+      req.kind = DeoptimizationRequest::kFullDeoptimization;
+    }
+    Dbg::RequestDeoptimization(req);
   }
-  if (NeedsFullDeoptimization(pEvent->eventKind)) {
-    CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
-    CHECK(req.method == nullptr);
-    req.kind = DeoptimizationRequest::kFullDeoptimization;
+  uint32_t instrumentation_event = GetInstrumentationEventFor(pEvent->eventKind);
+  if (instrumentation_event != 0) {
+    DeoptimizationRequest req;
+    req.kind = DeoptimizationRequest::kRegisterForEvent;
+    req.instrumentation_event = instrumentation_event;
+    Dbg::RequestDeoptimization(req);
   }
 
   {
@@ -187,9 +219,6 @@
     ++event_list_size_;
   }
 
-  // TODO we can do better job here since we should process only one request: the one we just
-  // created.
-  Dbg::RequestDeoptimization(req);
   Dbg::ManageDeoptimization();
 
   return ERR_NONE;
@@ -219,40 +248,48 @@
   }
   pEvent->prev = NULL;
 
-  /*
-   * Unhook us from the interpreter, if necessary.
-   */
-  DeoptimizationRequest req;
-  for (int i = 0; i < pEvent->modCount; i++) {
-    JdwpEventMod* pMod = &pEvent->mods[i];
-    if (pMod->modKind == MK_LOCATION_ONLY) {
-      /* should only be for Breakpoint, Step, and Exception */
-      Dbg::UnwatchLocation(&pMod->locationOnly.loc, &req);
+  {
+    /*
+     * Unhook us from the interpreter, if necessary.
+     */
+    DeoptimizationRequest req;
+    for (int i = 0; i < pEvent->modCount; i++) {
+      JdwpEventMod* pMod = &pEvent->mods[i];
+      if (pMod->modKind == MK_LOCATION_ONLY) {
+        /* should only be for Breakpoint, Step, and Exception */
+        Dbg::UnwatchLocation(&pMod->locationOnly.loc, &req);
+      }
+      if (pMod->modKind == MK_STEP) {
+        /* should only be for EK_SINGLE_STEP; should only be one */
+        Dbg::UnconfigureStep(pMod->step.threadId);
+      }
     }
-    if (pMod->modKind == MK_STEP) {
-      /* should only be for EK_SINGLE_STEP; should only be one */
-      Dbg::UnconfigureStep(pMod->step.threadId);
+    if (pEvent->eventKind == EK_SINGLE_STEP) {
+      // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
+      // loop between each single-step. In a IDE, this would happens each time the user click on the
+      // "single-step" button. Here we delay the full undeoptimization to the next resume
+      // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
+      // runtime shutdown).
+      // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
+      // deoptimization and only the last single-step will trigger a full undeoptimization.
+      Dbg::DelayFullUndeoptimization();
+    } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
+      CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
+      CHECK(req.method == nullptr);
+      req.kind = DeoptimizationRequest::kFullUndeoptimization;
     }
+    Dbg::RequestDeoptimization(req);
   }
-  if (pEvent->eventKind == EK_SINGLE_STEP) {
-    // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
-    // loop between each single-step. In a IDE, this would happens each time the user click on the
-    // "single-step" button. Here we delay the full undeoptimization to the next resume
-    // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
-    // runtime shutdown).
-    // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
-    // deoptimization and only the last single-step will trigger a full undeoptimization.
-    Dbg::DelayFullUndeoptimization();
-  } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
-    CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
-    CHECK(req.method == nullptr);
-    req.kind = DeoptimizationRequest::kFullUndeoptimization;
+  uint32_t instrumentation_event = GetInstrumentationEventFor(pEvent->eventKind);
+  if (instrumentation_event != 0) {
+    DeoptimizationRequest req;
+    req.kind = DeoptimizationRequest::kUnregisterForEvent;
+    req.instrumentation_event = instrumentation_event;
+    Dbg::RequestDeoptimization(req);
   }
 
   --event_list_size_;
   CHECK(event_list_size_ != 0 || event_list_ == NULL);
-
-  Dbg::RequestDeoptimization(req);
 }
 
 /*
@@ -280,7 +317,7 @@
   if (found) {
     Dbg::ManageDeoptimization();
   } else {
-    LOG(DEBUG) << StringPrintf("Odd: no match when removing event reqId=0x%04x", requestId);
+    LOG(WARNING) << StringPrintf("Odd: no match when removing event reqId=0x%04x", requestId);
   }
 }
 
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 4843c2b..05bfe0d 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -17,7 +17,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-
+#include <memory>
 #include <string>
 
 #include "atomic.h"
@@ -32,7 +32,6 @@
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -107,8 +106,8 @@
                              Dbg::GetMethodName(method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
 
-  UniquePtr<JdwpTag[]> argTypes(arg_count > 0 ? new JdwpTag[arg_count] : NULL);
-  UniquePtr<uint64_t[]> argValues(arg_count > 0 ? new uint64_t[arg_count] : NULL);
+  std::unique_ptr<JdwpTag[]> argTypes(arg_count > 0 ? new JdwpTag[arg_count] : NULL);
+  std::unique_ptr<uint64_t[]> argValues(arg_count > 0 ? new uint64_t[arg_count] : NULL);
   for (int32_t i = 0; i < arg_count; ++i) {
     argTypes[i] = request.ReadTag();
     size_t width = Dbg::GetTagWidth(argTypes[i]);
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index f480256..64e9f37 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -235,7 +235,7 @@
 JdwpState* JdwpState::Create(const JdwpOptions* options) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
-  UniquePtr<JdwpState> state(new JdwpState(options));
+  std::unique_ptr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
@@ -573,7 +573,7 @@
  */
 int64_t JdwpState::LastDebuggerActivity() {
   if (!Dbg::IsDebuggerActive()) {
-    LOG(DEBUG) << "no active debugger";
+    LOG(WARNING) << "no active debugger";
     return -1;
   }
 
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 3f5546e..4a80957 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -416,7 +416,7 @@
         if (listenSock >= 0) {
           LOG(ERROR) << "Exit wake set, but not exiting?";
         } else {
-          LOG(DEBUG) << "Got wake-up signal, bailing out of select";
+          VLOG(jdwp) << "Got wake-up signal, bailing out of select";
         }
         goto fail;
       }
@@ -442,7 +442,7 @@
           if (errno != EINTR) {
             goto fail;
           }
-          LOG(DEBUG) << "+++ EINTR hit";
+          VLOG(jdwp) << "+++ EINTR hit";
           return true;
         } else if (readCount == 0) {
           /* EOF hit -- far end went away */
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index e6a35d0..b51e1d5 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -19,6 +19,7 @@
 #include <dlfcn.h>
 
 #include <cstdarg>
+#include <memory>
 #include <utility>
 #include <vector>
 
@@ -29,6 +30,7 @@
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "indirect_reference_table-inl.h"
 #include "interpreter/interpreter.h"
 #include "jni.h"
 #include "mirror/art_field-inl.h"
@@ -48,7 +50,6 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utf.h"
-#include "UniquePtr.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -108,7 +109,7 @@
   ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
   soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchMethodError;",
                                  "no %s method \"%s.%s%s\"",
-                                 kind, ClassHelper(c).GetDescriptor(), name, sig);
+                                 kind, c->GetDescriptor().c_str(), name, sig);
 }
 
 static mirror::Class* EnsureInitialized(Thread* self, mirror::Class* klass)
@@ -116,11 +117,12 @@
   if (LIKELY(klass->IsInitialized())) {
     return klass;
   }
-  SirtRef<mirror::Class> sirt_klass(self, klass);
-  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> h_klass(hs.NewHandle(klass));
+  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true)) {
     return nullptr;
   }
-  return sirt_klass.get();
+  return h_klass.Get();
 }
 
 static jmethodID FindMethodID(ScopedObjectAccess& soa, jclass jni_class,
@@ -179,16 +181,17 @@
 static jfieldID FindFieldID(const ScopedObjectAccess& soa, jclass jni_class, const char* name,
                             const char* sig, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  SirtRef<mirror::Class> c(soa.Self(), EnsureInitialized(soa.Self(),
-                                                         soa.Decode<mirror::Class*>(jni_class)));
-  if (c.get() == nullptr) {
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(EnsureInitialized(soa.Self(), soa.Decode<mirror::Class*>(jni_class))));
+  if (c.Get() == nullptr) {
     return nullptr;
   }
   mirror::ArtField* field = nullptr;
   mirror::Class* field_type;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   if (sig[1] != '\0') {
-    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), c->GetClassLoader());
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(c->GetClassLoader()));
     field_type = class_linker->FindClass(soa.Self(), sig, class_loader);
   } else {
     field_type = class_linker->FindPrimitiveClass(*sig);
@@ -197,25 +200,27 @@
     // Failed to find type from the signature of the field.
     DCHECK(soa.Self()->IsExceptionPending());
     ThrowLocation throw_location;
-    SirtRef<mirror::Throwable> cause(soa.Self(), soa.Self()->GetException(&throw_location));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Throwable> cause(hs.NewHandle(soa.Self()->GetException(&throw_location)));
     soa.Self()->ClearException();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
                                    "no type \"%s\" found and so no field \"%s\" "
                                    "could be found in class \"%s\" or its superclasses", sig, name,
-                                   ClassHelper(c.get()).GetDescriptor());
-    soa.Self()->GetException(nullptr)->SetCause(cause.get());
+                                   c->GetDescriptor().c_str());
+    soa.Self()->GetException(nullptr)->SetCause(cause.Get());
     return nullptr;
   }
   if (is_static) {
-    field = c->FindStaticField(name, ClassHelper(field_type).GetDescriptor());
+    field = mirror::Class::FindStaticField(soa.Self(), c, name,
+                                           field_type->GetDescriptor().c_str());
   } else {
-    field = c->FindInstanceField(name, ClassHelper(field_type).GetDescriptor());
+    field = c->FindInstanceField(name, field_type->GetDescriptor().c_str());
   }
   if (field == nullptr) {
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
                                    "no \"%s\" field \"%s\" in class \"%s\" or its superclasses",
-                                   sig, name, ClassHelper(c.get()).GetDescriptor());
+                                   sig, name, c->GetDescriptor().c_str());
     return nullptr;
   }
   return soa.EncodeField(field);
@@ -514,16 +519,28 @@
   SafeMap<std::string, SharedLibrary*> libraries_;
 };
 
-#define CHECK_NON_NULL_ARGUMENT(value) CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value)
+#define CHECK_NON_NULL_ARGUMENT(value) \
+    CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, nullptr)
 
-#define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value) \
+#define CHECK_NON_NULL_ARGUMENT_RETURN_VOID(value) \
+    CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, )
+
+#define CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(value) \
+    CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, 0)
+
+#define CHECK_NON_NULL_ARGUMENT_RETURN(value, return_val) \
+    CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, return_val)
+
+#define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value, return_val) \
   if (UNLIKELY(value == nullptr)) { \
     JniAbortF(name, #value " == null"); \
+    return return_val; \
   }
 
 #define CHECK_NON_NULL_MEMCPY_ARGUMENT(length, value) \
   if (UNLIKELY(length != 0 && value == nullptr)) { \
     JniAbortF(__FUNCTION__, #value " == null"); \
+    return; \
   }
 
 class JNI {
@@ -545,7 +562,8 @@
     ScopedObjectAccess soa(env);
     mirror::Class* c = nullptr;
     if (runtime->IsStarted()) {
-      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), GetClassLoader(soa));
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ClassLoader> class_loader(hs.NewHandle(GetClassLoader(soa)));
       c = class_linker->FindClass(soa.Self(), descriptor.c_str(), class_loader);
     } else {
       c = class_linker->FindSystemClass(soa.Self(), descriptor.c_str());
@@ -609,8 +627,8 @@
   }
 
   static jboolean IsAssignableFrom(JNIEnv* env, jclass java_class1, jclass java_class2) {
-    CHECK_NON_NULL_ARGUMENT(java_class1);
-    CHECK_NON_NULL_ARGUMENT(java_class2);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_class1, JNI_FALSE);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_class2, JNI_FALSE);
     ScopedObjectAccess soa(env);
     mirror::Class* c1 = soa.Decode<mirror::Class*>(java_class1);
     mirror::Class* c2 = soa.Decode<mirror::Class*>(java_class2);
@@ -618,7 +636,7 @@
   }
 
   static jboolean IsInstanceOf(JNIEnv* env, jobject jobj, jclass java_class) {
-    CHECK_NON_NULL_ARGUMENT(java_class);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_class, JNI_FALSE);
     if (jobj == nullptr) {
       // Note: JNI is different from regular Java instanceof in this respect
       return JNI_TRUE;
@@ -642,7 +660,7 @@
   }
 
   static jint ThrowNew(JNIEnv* env, jclass c, const char* msg) {
-    CHECK_NON_NULL_ARGUMENT(c);
+    CHECK_NON_NULL_ARGUMENT_RETURN(c, JNI_ERR);
     return ThrowNewException(env, c, msg, nullptr);
   }
 
@@ -657,26 +675,28 @@
   static void ExceptionDescribe(JNIEnv* env) {
     ScopedObjectAccess soa(env);
 
-    SirtRef<mirror::Object> old_throw_this_object(soa.Self(), nullptr);
-    SirtRef<mirror::ArtMethod> old_throw_method(soa.Self(), nullptr);
-    SirtRef<mirror::Throwable> old_exception(soa.Self(), nullptr);
+    StackHandleScope<3> hs(soa.Self());
+    // TODO: Use nullptr instead of null handles?
+    auto old_throw_this_object(hs.NewHandle<mirror::Object>(nullptr));
+    auto old_throw_method(hs.NewHandle<mirror::ArtMethod>(nullptr));
+    auto old_exception(hs.NewHandle<mirror::Throwable>(nullptr));
     uint32_t old_throw_dex_pc;
     {
       ThrowLocation old_throw_location;
       mirror::Throwable* old_exception_obj = soa.Self()->GetException(&old_throw_location);
-      old_throw_this_object.reset(old_throw_location.GetThis());
-      old_throw_method.reset(old_throw_location.GetMethod());
-      old_exception.reset(old_exception_obj);
+      old_throw_this_object.Assign(old_throw_location.GetThis());
+      old_throw_method.Assign(old_throw_location.GetMethod());
+      old_exception.Assign(old_exception_obj);
       old_throw_dex_pc = old_throw_location.GetDexPc();
       soa.Self()->ClearException();
     }
     ScopedLocalRef<jthrowable> exception(env,
-                                         soa.AddLocalReference<jthrowable>(old_exception.get()));
+                                         soa.AddLocalReference<jthrowable>(old_exception.Get()));
     ScopedLocalRef<jclass> exception_class(env, env->GetObjectClass(exception.get()));
     jmethodID mid = env->GetMethodID(exception_class.get(), "printStackTrace", "()V");
     if (mid == nullptr) {
       LOG(WARNING) << "JNI WARNING: no printStackTrace()V in "
-                   << PrettyTypeOf(old_exception.get());
+                   << PrettyTypeOf(old_exception.Get());
     } else {
       env->CallVoidMethod(exception.get(), mid);
       if (soa.Self()->IsExceptionPending()) {
@@ -685,10 +705,10 @@
         soa.Self()->ClearException();
       }
     }
-    ThrowLocation gc_safe_throw_location(old_throw_this_object.get(), old_throw_method.get(),
+    ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
 
-    soa.Self()->SetException(gc_safe_throw_location, old_exception.get());
+    soa.Self()->SetException(gc_safe_throw_location, old_exception.Get());
   }
 
   static jthrowable ExceptionOccurred(JNIEnv* env) {
@@ -702,7 +722,9 @@
   }
 
   static jint PushLocalFrame(JNIEnv* env, jint capacity) {
-    if (EnsureLocalCapacity(env, capacity, "PushLocalFrame") != JNI_OK) {
+    // TODO: SOA may not be necessary but I do it to please lock annotations.
+    ScopedObjectAccess soa(env);
+    if (EnsureLocalCapacity(soa, capacity, "PushLocalFrame") != JNI_OK) {
       return JNI_ERR;
     }
     static_cast<JNIEnvExt*>(env)->PushFrame(capacity);
@@ -717,7 +739,9 @@
   }
 
   static jint EnsureLocalCapacity(JNIEnv* env, jint desired_capacity) {
-    return EnsureLocalCapacity(env, desired_capacity, "EnsureLocalCapacity");
+    // TODO: SOA may not be necessary but I do it to please lock annotations.
+    ScopedObjectAccess soa(env);
+    return EnsureLocalCapacity(soa, desired_capacity, "EnsureLocalCapacity");
   }
 
   static jobject NewGlobalRef(JNIEnv* env, jobject obj) {
@@ -775,6 +799,7 @@
     if (obj == nullptr) {
       return;
     }
+    ScopedObjectAccess soa(env);
     IndirectReferenceTable& locals = reinterpret_cast<JNIEnvExt*>(env)->locals;
 
     uint32_t cookie = reinterpret_cast<JNIEnvExt*>(env)->local_ref_cookie;
@@ -907,8 +932,8 @@
   static jboolean CallBooleanMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -916,15 +941,15 @@
   }
 
   static jboolean CallBooleanMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetZ();
   }
 
   static jboolean CallBooleanMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetZ();
@@ -933,8 +958,8 @@
   static jbyte CallByteMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -942,15 +967,15 @@
   }
 
   static jbyte CallByteMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetB();
   }
 
   static jbyte CallByteMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetB();
@@ -959,8 +984,8 @@
   static jchar CallCharMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -968,15 +993,15 @@
   }
 
   static jchar CallCharMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetC();
   }
 
   static jchar CallCharMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetC();
@@ -985,8 +1010,8 @@
   static jdouble CallDoubleMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -994,15 +1019,15 @@
   }
 
   static jdouble CallDoubleMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetD();
   }
 
   static jdouble CallDoubleMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetD();
@@ -1011,8 +1036,8 @@
   static jfloat CallFloatMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1020,15 +1045,15 @@
   }
 
   static jfloat CallFloatMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetF();
   }
 
   static jfloat CallFloatMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetF();
@@ -1037,8 +1062,8 @@
   static jint CallIntMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1046,15 +1071,15 @@
   }
 
   static jint CallIntMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetI();
   }
 
   static jint CallIntMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetI();
@@ -1063,8 +1088,8 @@
   static jlong CallLongMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1072,15 +1097,15 @@
   }
 
   static jlong CallLongMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetJ();
   }
 
   static jlong CallLongMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetJ();
@@ -1089,8 +1114,8 @@
   static jshort CallShortMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1098,15 +1123,15 @@
   }
 
   static jshort CallShortMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetS();
   }
 
   static jshort CallShortMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid,
                                                args).GetS();
@@ -1115,23 +1140,23 @@
   static void CallVoidMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap);
     va_end(ap);
   }
 
   static void CallVoidMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args);
   }
 
   static void CallVoidMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeVirtualOrInterfaceWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args);
   }
@@ -1170,8 +1195,8 @@
                                               ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1180,16 +1205,16 @@
 
   static jboolean CallNonvirtualBooleanMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                                va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetZ();
   }
 
   static jboolean CallNonvirtualBooleanMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                                jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetZ();
   }
@@ -1197,8 +1222,8 @@
   static jbyte CallNonvirtualByteMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1207,16 +1232,16 @@
 
   static jbyte CallNonvirtualByteMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetB();
   }
 
   static jbyte CallNonvirtualByteMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetB();
   }
@@ -1224,8 +1249,8 @@
   static jchar CallNonvirtualCharMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1234,16 +1259,16 @@
 
   static jchar CallNonvirtualCharMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetC();
   }
 
   static jchar CallNonvirtualCharMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetC();
   }
@@ -1251,8 +1276,8 @@
   static jshort CallNonvirtualShortMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1261,16 +1286,16 @@
 
   static jshort CallNonvirtualShortMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                            va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetS();
   }
 
   static jshort CallNonvirtualShortMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                            jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetS();
   }
@@ -1278,8 +1303,8 @@
   static jint CallNonvirtualIntMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1288,16 +1313,16 @@
 
   static jint CallNonvirtualIntMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                        va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetI();
   }
 
   static jint CallNonvirtualIntMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                        jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetI();
   }
@@ -1305,8 +1330,8 @@
   static jlong CallNonvirtualLongMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1315,16 +1340,16 @@
 
   static jlong CallNonvirtualLongMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetJ();
   }
 
   static jlong CallNonvirtualLongMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                          jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetJ();
   }
@@ -1332,8 +1357,8 @@
   static jfloat CallNonvirtualFloatMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1342,16 +1367,16 @@
 
   static jfloat CallNonvirtualFloatMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                            va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetF();
   }
 
   static jfloat CallNonvirtualFloatMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                            jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetF();
   }
@@ -1359,8 +1384,8 @@
   static jdouble CallNonvirtualDoubleMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
@@ -1369,16 +1394,16 @@
 
   static jdouble CallNonvirtualDoubleMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                              va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, obj, mid, args).GetD();
   }
 
   static jdouble CallNonvirtualDoubleMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                              jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args).GetD();
   }
@@ -1386,8 +1411,8 @@
   static void CallNonvirtualVoidMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithVarArgs(soa, obj, mid, ap);
     va_end(ap);
@@ -1395,16 +1420,16 @@
 
   static void CallNonvirtualVoidMethodV(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                         va_list args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithVarArgs(soa, obj, mid, args);
   }
 
   static void CallNonvirtualVoidMethodA(JNIEnv* env, jobject obj, jclass, jmethodID mid,
                                         jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(obj);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(obj);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithJValues(soa, soa.Decode<mirror::Object*>(obj), mid, args);
   }
@@ -1443,8 +1468,8 @@
   }
 
   static void SetObjectField(JNIEnv* env, jobject java_object, jfieldID fid, jobject java_value) {
-    CHECK_NON_NULL_ARGUMENT(java_object);
-    CHECK_NON_NULL_ARGUMENT(fid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_object);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
     mirror::Object* v = soa.Decode<mirror::Object*>(java_value);
@@ -1453,7 +1478,7 @@
   }
 
   static void SetStaticObjectField(JNIEnv* env, jclass, jfieldID fid, jobject java_value) {
-    CHECK_NON_NULL_ARGUMENT(fid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     mirror::Object* v = soa.Decode<mirror::Object*>(java_value);
     mirror::ArtField* f = soa.DecodeField(fid);
@@ -1461,29 +1486,29 @@
   }
 
 #define GET_PRIMITIVE_FIELD(fn, instance) \
-  CHECK_NON_NULL_ARGUMENT(instance); \
-  CHECK_NON_NULL_ARGUMENT(fid); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(instance); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   mirror::Object* o = soa.Decode<mirror::Object*>(instance); \
   mirror::ArtField* f = soa.DecodeField(fid); \
   return f->Get ##fn (o)
 
 #define GET_STATIC_PRIMITIVE_FIELD(fn) \
-  CHECK_NON_NULL_ARGUMENT(fid); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   mirror::ArtField* f = soa.DecodeField(fid); \
   return f->Get ##fn (f->GetDeclaringClass())
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
-  CHECK_NON_NULL_ARGUMENT(instance); \
-  CHECK_NON_NULL_ARGUMENT(fid); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_VOID(instance); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   mirror::Object* o = soa.Decode<mirror::Object*>(instance); \
   mirror::ArtField* f = soa.DecodeField(fid); \
   f->Set ##fn <false>(o, value)
 
 #define SET_STATIC_PRIMITIVE_FIELD(fn, value) \
-  CHECK_NON_NULL_ARGUMENT(fid); \
+  CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   mirror::ArtField* f = soa.DecodeField(fid); \
   f->Set ##fn <false>(f->GetDeclaringClass(), value)
@@ -1644,7 +1669,7 @@
   static jboolean CallStaticBooleanMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1652,13 +1677,13 @@
   }
 
   static jboolean CallStaticBooleanMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetZ();
   }
 
   static jboolean CallStaticBooleanMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetZ();
   }
@@ -1666,7 +1691,7 @@
   static jbyte CallStaticByteMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1674,13 +1699,13 @@
   }
 
   static jbyte CallStaticByteMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetB();
   }
 
   static jbyte CallStaticByteMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetB();
   }
@@ -1688,7 +1713,7 @@
   static jchar CallStaticCharMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1696,13 +1721,13 @@
   }
 
   static jchar CallStaticCharMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetC();
   }
 
   static jchar CallStaticCharMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetC();
   }
@@ -1710,7 +1735,7 @@
   static jshort CallStaticShortMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1718,13 +1743,13 @@
   }
 
   static jshort CallStaticShortMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetS();
   }
 
   static jshort CallStaticShortMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetS();
   }
@@ -1732,7 +1757,7 @@
   static jint CallStaticIntMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1740,13 +1765,13 @@
   }
 
   static jint CallStaticIntMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetI();
   }
 
   static jint CallStaticIntMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetI();
   }
@@ -1754,7 +1779,7 @@
   static jlong CallStaticLongMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1762,13 +1787,13 @@
   }
 
   static jlong CallStaticLongMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetJ();
   }
 
   static jlong CallStaticLongMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetJ();
   }
@@ -1776,7 +1801,7 @@
   static jfloat CallStaticFloatMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1784,13 +1809,13 @@
   }
 
   static jfloat CallStaticFloatMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetF();
   }
 
   static jfloat CallStaticFloatMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetF();
   }
@@ -1798,7 +1823,7 @@
   static jdouble CallStaticDoubleMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     JValue result(InvokeWithVarArgs(soa, nullptr, mid, ap));
     va_end(ap);
@@ -1806,13 +1831,13 @@
   }
 
   static jdouble CallStaticDoubleMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithVarArgs(soa, nullptr, mid, args).GetD();
   }
 
   static jdouble CallStaticDoubleMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(mid);
     ScopedObjectAccess soa(env);
     return InvokeWithJValues(soa, nullptr, mid, args).GetD();
   }
@@ -1820,20 +1845,20 @@
   static void CallStaticVoidMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
     va_list ap;
     va_start(ap, mid);
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithVarArgs(soa, nullptr, mid, ap);
     va_end(ap);
   }
 
   static void CallStaticVoidMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithVarArgs(soa, nullptr, mid, args);
   }
 
   static void CallStaticVoidMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    CHECK_NON_NULL_ARGUMENT(mid);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(mid);
     ScopedObjectAccess soa(env);
     InvokeWithJValues(soa, nullptr, mid, args);
   }
@@ -1862,20 +1887,20 @@
   }
 
   static jsize GetStringLength(JNIEnv* env, jstring java_string) {
-    CHECK_NON_NULL_ARGUMENT(java_string);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(java_string);
     ScopedObjectAccess soa(env);
     return soa.Decode<mirror::String*>(java_string)->GetLength();
   }
 
   static jsize GetStringUTFLength(JNIEnv* env, jstring java_string) {
-    CHECK_NON_NULL_ARGUMENT(java_string);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(java_string);
     ScopedObjectAccess soa(env);
     return soa.Decode<mirror::String*>(java_string)->GetUtfLength();
   }
 
   static void GetStringRegion(JNIEnv* env, jstring java_string, jsize start, jsize length,
                               jchar* buf) {
-    CHECK_NON_NULL_ARGUMENT(java_string);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
     if (start < 0 || length < 0 || start + length > s->GetLength()) {
@@ -1889,7 +1914,7 @@
 
   static void GetStringUTFRegion(JNIEnv* env, jstring java_string, jsize start, jsize length,
                                  char* buf) {
-    CHECK_NON_NULL_ARGUMENT(java_string);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
     if (start < 0 || length < 0 || start + length > s->GetLength()) {
@@ -1921,7 +1946,7 @@
   }
 
   static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
-    CHECK_NON_NULL_ARGUMENT(java_string);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     delete[] chars;
     ScopedObjectAccess soa(env);
     UnpinPrimitiveArray(soa, soa.Decode<mirror::String*>(java_string)->GetCharArray());
@@ -1958,7 +1983,7 @@
   }
 
   static jsize GetArrayLength(JNIEnv* env, jarray java_array) {
-    CHECK_NON_NULL_ARGUMENT(java_array);
+    CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(java_array);
     ScopedObjectAccess soa(env);
     mirror::Object* obj = soa.Decode<mirror::Object*>(java_array);
     if (UNLIKELY(!obj->IsArrayInstance())) {
@@ -1978,7 +2003,7 @@
 
   static void SetObjectArrayElement(JNIEnv* env, jobjectArray java_array, jsize index,
                                     jobject java_value) {
-    CHECK_NON_NULL_ARGUMENT(java_array);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_array);
     ScopedObjectAccess soa(env);
     mirror::ObjectArray<mirror::Object>* array =
         soa.Decode<mirror::ObjectArray<mirror::Object>*>(java_array);
@@ -1987,38 +2012,31 @@
   }
 
   static jbooleanArray NewBooleanArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jbooleanArray, mirror::BooleanArray>(soa, length);
+    return NewPrimitiveArray<jbooleanArray, mirror::BooleanArray>(env, length);
   }
 
   static jbyteArray NewByteArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jbyteArray, mirror::ByteArray>(soa, length);
+    return NewPrimitiveArray<jbyteArray, mirror::ByteArray>(env, length);
   }
 
   static jcharArray NewCharArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jcharArray, mirror::CharArray>(soa, length);
+    return NewPrimitiveArray<jcharArray, mirror::CharArray>(env, length);
   }
 
   static jdoubleArray NewDoubleArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jdoubleArray, mirror::DoubleArray>(soa, length);
+    return NewPrimitiveArray<jdoubleArray, mirror::DoubleArray>(env, length);
   }
 
   static jfloatArray NewFloatArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jfloatArray, mirror::FloatArray>(soa, length);
+    return NewPrimitiveArray<jfloatArray, mirror::FloatArray>(env, length);
   }
 
   static jintArray NewIntArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jintArray, mirror::IntArray>(soa, length);
+    return NewPrimitiveArray<jintArray, mirror::IntArray>(env, length);
   }
 
   static jlongArray NewLongArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jlongArray, mirror::LongArray>(soa, length);
+    return NewPrimitiveArray<jlongArray, mirror::LongArray>(env, length);
   }
 
   static jobjectArray NewObjectArray(JNIEnv* env, jsize length, jclass element_jclass,
@@ -2027,6 +2045,7 @@
       JniAbortF("NewObjectArray", "negative array length: %d", length);
       return nullptr;
     }
+    CHECK_NON_NULL_ARGUMENT(element_jclass);
 
     // Compute the array class corresponding to the given element class.
     ScopedObjectAccess soa(env);
@@ -2068,14 +2087,18 @@
   }
 
   static jshortArray NewShortArray(JNIEnv* env, jsize length) {
-    ScopedObjectAccess soa(env);
-    return NewPrimitiveArray<jshortArray, mirror::ShortArray>(soa, length);
+    return NewPrimitiveArray<jshortArray, mirror::ShortArray>(env, length);
   }
 
   static void* GetPrimitiveArrayCritical(JNIEnv* env, jarray java_array, jboolean* is_copy) {
     CHECK_NON_NULL_ARGUMENT(java_array);
     ScopedObjectAccess soa(env);
     mirror::Array* array = soa.Decode<mirror::Array*>(java_array);
+    if (UNLIKELY(!array->GetClass()->IsPrimitiveArray())) {
+      JniAbortF("GetPrimitiveArrayCritical", "expected primitive array, given %s",
+                PrettyDescriptor(array->GetClass()).c_str());
+      return nullptr;
+    }
     gc::Heap* heap = Runtime::Current()->GetHeap();
     if (heap->IsMovableObject(array)) {
       heap->IncrementDisableMovingGC(soa.Self());
@@ -2089,196 +2112,174 @@
     return array->GetRawData(array->GetClass()->GetComponentSize(), 0);
   }
 
-  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ReleasePrimitiveArray(env, array, elements, mode);
+  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray java_array, void* elements,
+                                            jint mode) {
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_array);
+    ScopedObjectAccess soa(env);
+    mirror::Array* array = soa.Decode<mirror::Array*>(java_array);
+    if (UNLIKELY(!array->GetClass()->IsPrimitiveArray())) {
+      JniAbortF("ReleasePrimitiveArrayCritical", "expected primitive array, given %s",
+                PrettyDescriptor(array->GetClass()).c_str());
+      return;
+    }
+    const size_t component_size = array->GetClass()->GetComponentSize();
+    ReleasePrimitiveArray(soa, array, component_size, elements, mode);
   }
 
   static jboolean* GetBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jbooleanArray, jboolean*, mirror::BooleanArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jbooleanArray, jboolean, mirror::BooleanArray>(env, array, is_copy);
   }
 
   static jbyte* GetByteArrayElements(JNIEnv* env, jbyteArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jbyteArray, jbyte*, mirror::ByteArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jbyteArray, jbyte, mirror::ByteArray>(env, array, is_copy);
   }
 
   static jchar* GetCharArrayElements(JNIEnv* env, jcharArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jcharArray, jchar*, mirror::CharArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jcharArray, jchar, mirror::CharArray>(env, array, is_copy);
   }
 
   static jdouble* GetDoubleArrayElements(JNIEnv* env, jdoubleArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jdoubleArray, jdouble*, mirror::DoubleArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jdoubleArray, jdouble, mirror::DoubleArray>(env, array, is_copy);
   }
 
   static jfloat* GetFloatArrayElements(JNIEnv* env, jfloatArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jfloatArray, jfloat*, mirror::FloatArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jfloatArray, jfloat, mirror::FloatArray>(env, array, is_copy);
   }
 
   static jint* GetIntArrayElements(JNIEnv* env, jintArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jintArray, jint*, mirror::IntArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jintArray, jint, mirror::IntArray>(env, array, is_copy);
   }
 
   static jlong* GetLongArrayElements(JNIEnv* env, jlongArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jlongArray, jlong*, mirror::LongArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jlongArray, jlong, mirror::LongArray>(env, array, is_copy);
   }
 
   static jshort* GetShortArrayElements(JNIEnv* env, jshortArray array, jboolean* is_copy) {
-    CHECK_NON_NULL_ARGUMENT(array);
-    ScopedObjectAccess soa(env);
-    return GetPrimitiveArray<jshortArray, jshort*, mirror::ShortArray>(soa, array, is_copy);
+    return GetPrimitiveArray<jshortArray, jshort, mirror::ShortArray>(env, array, is_copy);
   }
 
   static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* elements,
                                           jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jbooleanArray, jboolean, mirror::BooleanArray>(env, array, elements,
+                                                                         mode);
   }
 
   static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte* elements, jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jbyteArray, jbyte, mirror::ByteArray>(env, array, elements, mode);
   }
 
   static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar* elements, jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jcharArray, jchar, mirror::CharArray>(env, array, elements, mode);
   }
 
   static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble* elements,
                                          jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jdoubleArray, jdouble, mirror::DoubleArray>(env, array, elements, mode);
   }
 
   static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat* elements,
                                         jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jfloatArray, jfloat, mirror::FloatArray>(env, array, elements, mode);
   }
 
   static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint* elements, jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jintArray, jint, mirror::IntArray>(env, array, elements, mode);
   }
 
   static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong* elements, jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jlongArray, jlong, mirror::LongArray>(env, array, elements, mode);
   }
 
   static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort* elements,
                                         jint mode) {
-    ReleasePrimitiveArray(env, array, elements, mode);
+    ReleasePrimitiveArray<jshortArray, jshort, mirror::ShortArray>(env, array, elements, mode);
   }
 
   static void GetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length,
                                     jboolean* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jbooleanArray, jboolean, mirror::BooleanArray>(soa, array, start,
+    GetPrimitiveArrayRegion<jbooleanArray, jboolean, mirror::BooleanArray>(env, array, start,
                                                                            length, buf);
   }
 
   static void GetByteArrayRegion(JNIEnv* env, jbyteArray array, jsize start, jsize length,
                                  jbyte* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jbyteArray, jbyte, mirror::ByteArray>(soa, array, start, length, buf);
+    GetPrimitiveArrayRegion<jbyteArray, jbyte, mirror::ByteArray>(env, array, start, length, buf);
   }
 
   static void GetCharArrayRegion(JNIEnv* env, jcharArray array, jsize start, jsize length,
                                  jchar* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jcharArray, jchar, mirror::CharArray>(soa, array, start, length, buf);
+    GetPrimitiveArrayRegion<jcharArray, jchar, mirror::CharArray>(env, array, start, length, buf);
   }
 
   static void GetDoubleArrayRegion(JNIEnv* env, jdoubleArray array, jsize start, jsize length,
                                    jdouble* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jdoubleArray, jdouble, mirror::DoubleArray>(soa, array, start, length,
+    GetPrimitiveArrayRegion<jdoubleArray, jdouble, mirror::DoubleArray>(env, array, start, length,
                                                                         buf);
   }
 
   static void GetFloatArrayRegion(JNIEnv* env, jfloatArray array, jsize start, jsize length,
                                   jfloat* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jfloatArray, jfloat, mirror::FloatArray>(soa, array, start, length,
+    GetPrimitiveArrayRegion<jfloatArray, jfloat, mirror::FloatArray>(env, array, start, length,
                                                                      buf);
   }
 
   static void GetIntArrayRegion(JNIEnv* env, jintArray array, jsize start, jsize length,
                                 jint* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jintArray, jint, mirror::IntArray>(soa, array, start, length, buf);
+    GetPrimitiveArrayRegion<jintArray, jint, mirror::IntArray>(env, array, start, length, buf);
   }
 
   static void GetLongArrayRegion(JNIEnv* env, jlongArray array, jsize start, jsize length,
                                  jlong* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jlongArray, jlong, mirror::LongArray>(soa, array, start, length, buf);
+    GetPrimitiveArrayRegion<jlongArray, jlong, mirror::LongArray>(env, array, start, length, buf);
   }
 
   static void GetShortArrayRegion(JNIEnv* env, jshortArray array, jsize start, jsize length,
                                   jshort* buf) {
-    ScopedObjectAccess soa(env);
-    GetPrimitiveArrayRegion<jshortArray, jshort, mirror::ShortArray>(soa, array, start, length,
+    GetPrimitiveArrayRegion<jshortArray, jshort, mirror::ShortArray>(env, array, start, length,
                                                                      buf);
   }
 
   static void SetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length,
                                     const jboolean* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jbooleanArray, jboolean, mirror::BooleanArray>(soa, array, start,
+    SetPrimitiveArrayRegion<jbooleanArray, jboolean, mirror::BooleanArray>(env, array, start,
                                                                            length, buf);
   }
 
   static void SetByteArrayRegion(JNIEnv* env, jbyteArray array, jsize start, jsize length,
                                  const jbyte* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jbyteArray, jbyte, mirror::ByteArray>(soa, array, start, length, buf);
+    SetPrimitiveArrayRegion<jbyteArray, jbyte, mirror::ByteArray>(env, array, start, length, buf);
   }
 
   static void SetCharArrayRegion(JNIEnv* env, jcharArray array, jsize start, jsize length,
                                  const jchar* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jcharArray, jchar, mirror::CharArray>(soa, array, start, length, buf);
+    SetPrimitiveArrayRegion<jcharArray, jchar, mirror::CharArray>(env, array, start, length, buf);
   }
 
   static void SetDoubleArrayRegion(JNIEnv* env, jdoubleArray array, jsize start, jsize length,
                                    const jdouble* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jdoubleArray, jdouble, mirror::DoubleArray>(soa, array, start, length,
+    SetPrimitiveArrayRegion<jdoubleArray, jdouble, mirror::DoubleArray>(env, array, start, length,
                                                                         buf);
   }
 
   static void SetFloatArrayRegion(JNIEnv* env, jfloatArray array, jsize start, jsize length,
                                   const jfloat* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jfloatArray, jfloat, mirror::FloatArray>(soa, array, start, length,
+    SetPrimitiveArrayRegion<jfloatArray, jfloat, mirror::FloatArray>(env, array, start, length,
                                                                      buf);
   }
 
   static void SetIntArrayRegion(JNIEnv* env, jintArray array, jsize start, jsize length,
                                 const jint* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jintArray, jint, mirror::IntArray>(soa, array, start, length, buf);
+    SetPrimitiveArrayRegion<jintArray, jint, mirror::IntArray>(env, array, start, length, buf);
   }
 
   static void SetLongArrayRegion(JNIEnv* env, jlongArray array, jsize start, jsize length,
                                  const jlong* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jlongArray, jlong, mirror::LongArray>(soa, array, start, length, buf);
+    SetPrimitiveArrayRegion<jlongArray, jlong, mirror::LongArray>(env, array, start, length, buf);
   }
 
   static void SetShortArrayRegion(JNIEnv* env, jshortArray array, jsize start, jsize length,
                                   const jshort* buf) {
-    ScopedObjectAccess soa(env);
-    SetPrimitiveArrayRegion<jshortArray, jshort, mirror::ShortArray>(soa, array, start, length,
+    SetPrimitiveArrayRegion<jshortArray, jshort, mirror::ShortArray>(env, array, start, length,
                                                                      buf);
   }
 
@@ -2293,7 +2294,7 @@
       JniAbortF("RegisterNatives", "negative method count: %d", method_count);
       return JNI_ERR;  // Not reached.
     }
-    CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", java_class);
+    CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", java_class, JNI_ERR);
     ScopedObjectAccess soa(env);
     mirror::Class* c = soa.Decode<mirror::Class*>(java_class);
     if (UNLIKELY(method_count == 0)) {
@@ -2301,7 +2302,7 @@
           << PrettyDescriptor(c);
       return JNI_OK;
     }
-    CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", methods);
+    CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", methods, JNI_ERR);
     for (jint i = 0; i < method_count; ++i) {
       const char* name = methods[i].name;
       const char* sig = methods[i].signature;
@@ -2338,30 +2339,37 @@
   }
 
   static jint UnregisterNatives(JNIEnv* env, jclass java_class) {
-    CHECK_NON_NULL_ARGUMENT(java_class);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_class, JNI_ERR);
     ScopedObjectAccess soa(env);
     mirror::Class* c = soa.Decode<mirror::Class*>(java_class);
 
     VLOG(jni) << "[Unregistering JNI native methods for " << PrettyClass(c) << "]";
 
+    size_t unregistered_count = 0;
     for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
       mirror::ArtMethod* m = c->GetDirectMethod(i);
       if (m->IsNative()) {
         m->UnregisterNative(soa.Self());
+        unregistered_count++;
       }
     }
     for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
       mirror::ArtMethod* m = c->GetVirtualMethod(i);
       if (m->IsNative()) {
         m->UnregisterNative(soa.Self());
+        unregistered_count++;
       }
     }
 
+    if (unregistered_count == 0) {
+      LOG(WARNING) << "JNI UnregisterNatives: attempt to unregister native methods of class '"
+          << PrettyDescriptor(c) << "' that contains no native methods";
+    }
     return JNI_OK;
   }
 
   static jint MonitorEnter(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS {
-    CHECK_NON_NULL_ARGUMENT(java_object);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_object, JNI_ERR);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
     o = o->MonitorEnter(soa.Self());
@@ -2373,7 +2381,7 @@
   }
 
   static jint MonitorExit(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS {
-    CHECK_NON_NULL_ARGUMENT(java_object);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_object, JNI_ERR);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
     o->MonitorExit(soa.Self());
@@ -2385,7 +2393,7 @@
   }
 
   static jint GetJavaVM(JNIEnv* env, JavaVM** vm) {
-    CHECK_NON_NULL_ARGUMENT(vm);
+    CHECK_NON_NULL_ARGUMENT_RETURN(vm, JNI_ERR);
     Runtime* runtime = Runtime::Current();
     if (runtime != nullptr) {
       *vm = runtime->GetJavaVM();
@@ -2425,7 +2433,7 @@
   }
 
   static jobjectRefType GetObjectRefType(JNIEnv* env, jobject java_object) {
-    CHECK_NON_NULL_ARGUMENT(java_object);
+    CHECK_NON_NULL_ARGUMENT_RETURN(java_object, JNIInvalidRefType);
 
     // Do we definitely know what kind of reference this is?
     IndirectRef ref = reinterpret_cast<IndirectRef>(java_object);
@@ -2442,9 +2450,9 @@
       return JNIGlobalRefType;
     case kWeakGlobal:
       return JNIWeakGlobalRefType;
-    case kSirtOrInvalid:
+    case kHandleScopeOrInvalid:
       // Is it in a stack IRT?
-      if (static_cast<JNIEnvExt*>(env)->self->SirtContains(java_object)) {
+      if (static_cast<JNIEnvExt*>(env)->self->HandleScopeContains(java_object)) {
         return JNILocalRefType;
       }
       return JNIInvalidRefType;
@@ -2454,69 +2462,99 @@
   }
 
  private:
-  static jint EnsureLocalCapacity(JNIEnv* env, jint desired_capacity,
-                                  const char* caller) {
+  static jint EnsureLocalCapacity(ScopedObjectAccess& soa, jint desired_capacity,
+                                  const char* caller) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // TODO: we should try to expand the table if necessary.
     if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsMax)) {
       LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
       return JNI_ERR;
     }
     // TODO: this isn't quite right, since "capacity" includes holes.
-    size_t capacity = static_cast<JNIEnvExt*>(env)->locals.Capacity();
+    const size_t capacity = soa.Env()->locals.Capacity();
     bool okay = (static_cast<jint>(kLocalsMax - capacity) >= desired_capacity);
     if (!okay) {
-      ScopedObjectAccess soa(env);
       soa.Self()->ThrowOutOfMemoryError(caller);
     }
     return okay ? JNI_OK : JNI_ERR;
   }
 
   template<typename JniT, typename ArtT>
-  static JniT NewPrimitiveArray(const ScopedObjectAccess& soa, jsize length)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  static JniT NewPrimitiveArray(JNIEnv* env, jsize length) {
     if (UNLIKELY(length < 0)) {
       JniAbortF("NewPrimitiveArray", "negative array length: %d", length);
       return nullptr;
     }
+    ScopedObjectAccess soa(env);
     ArtT* result = ArtT::Alloc(soa.Self(), length);
     return soa.AddLocalReference<JniT>(result);
   }
 
-  template <typename ArrayT, typename CArrayT, typename ArtArrayT>
-  static CArrayT GetPrimitiveArray(ScopedObjectAccess& soa, ArrayT java_array,
-                                   jboolean* is_copy)
+  template <typename JArrayT, typename ElementT, typename ArtArrayT>
+  static ArtArrayT* DecodeAndCheckArrayType(ScopedObjectAccess& soa, JArrayT java_array,
+                                           const char* fn_name, const char* operation)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ArtArrayT* array = soa.Decode<ArtArrayT*>(java_array);
+    if (UNLIKELY(ArtArrayT::GetArrayClass() != array->GetClass())) {
+      JniAbortF(fn_name, "attempt to %s %s primitive array elements with an object of type %s",
+                operation, PrettyDescriptor(ArtArrayT::GetArrayClass()->GetComponentType()).c_str(),
+                PrettyDescriptor(array->GetClass()).c_str());
+      return nullptr;
+    }
+    DCHECK_EQ(sizeof(ElementT), array->GetClass()->GetComponentSize());
+    return array;
+  }
+
+  template <typename ArrayT, typename ElementT, typename ArtArrayT>
+  static ElementT* GetPrimitiveArray(JNIEnv* env, ArrayT java_array, jboolean* is_copy) {
+    CHECK_NON_NULL_ARGUMENT(java_array);
+    ScopedObjectAccess soa(env);
+    ArtArrayT* array = DecodeAndCheckArrayType<ArrayT, ElementT, ArtArrayT>(soa, java_array,
+                                                                            "GetArrayElements",
+                                                                            "get");
+    if (UNLIKELY(array == nullptr)) {
+      return nullptr;
+    }
     PinPrimitiveArray(soa, array);
     // Only make a copy if necessary.
     if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
       if (is_copy != nullptr) {
         *is_copy = JNI_TRUE;
       }
-      static const size_t component_size = array->GetClass()->GetComponentSize();
+      const size_t component_size = sizeof(ElementT);
       size_t size = array->GetLength() * component_size;
       void* data = new uint64_t[RoundUp(size, 8) / 8];
       memcpy(data, array->GetData(), size);
-      return reinterpret_cast<CArrayT>(data);
+      return reinterpret_cast<ElementT*>(data);
     } else {
       if (is_copy != nullptr) {
         *is_copy = JNI_FALSE;
       }
-      return reinterpret_cast<CArrayT>(array->GetData());
+      return reinterpret_cast<ElementT*>(array->GetData());
     }
   }
 
-  template <typename ArrayT, typename ElementT>
+  template <typename ArrayT, typename ElementT, typename ArtArrayT>
   static void ReleasePrimitiveArray(JNIEnv* env, ArrayT java_array, ElementT* elements, jint mode) {
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_array);
     ScopedObjectAccess soa(env);
-    mirror::Array* array = soa.Decode<mirror::Array*>(java_array);
-    size_t component_size = array->GetClass()->GetComponentSize();
+    ArtArrayT* array = DecodeAndCheckArrayType<ArrayT, ElementT, ArtArrayT>(soa, java_array,
+                                                                            "ReleaseArrayElements",
+                                                                            "release");
+    if (array == nullptr) {
+      return;
+    }
+    ReleasePrimitiveArray(soa, array, sizeof(ElementT), elements, mode);
+  }
+
+  static void ReleasePrimitiveArray(ScopedObjectAccess& soa, mirror::Array* array,
+                                    size_t component_size, void* elements, jint mode)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     void* array_data = array->GetRawData(component_size, 0);
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    bool is_copy = array_data != reinterpret_cast<void*>(elements);
+    bool is_copy = array_data != elements;
     size_t bytes = array->GetLength() * component_size;
-    VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
-               << " elements " << reinterpret_cast<void*>(elements);
+    VLOG(heap) << "Release primitive array " << soa.Env() << " array_data " << array_data
+               << " elements " << elements;
     if (is_copy) {
       // Sanity check: If elements is not the same as the java array's data, it better not be a
       // heap address. TODO: This might be slow to check, may be worth keeping track of which
@@ -2542,33 +2580,43 @@
     }
   }
 
-  template <typename JavaArrayT, typename JavaT, typename ArrayT>
-  static void GetPrimitiveArrayRegion(ScopedObjectAccess& soa, JavaArrayT java_array,
-                                      jsize start, jsize length, JavaT* buf)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK_NON_NULL_ARGUMENT(java_array);
-    ArrayT* array = soa.Decode<ArrayT*>(java_array);
-    if (start < 0 || length < 0 || start + length > array->GetLength()) {
-      ThrowAIOOBE(soa, array, start, length, "src");
-    } else {
-      CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      JavaT* data = array->GetData();
-      memcpy(buf, data + start, length * sizeof(JavaT));
+  template <typename JArrayT, typename ElementT, typename ArtArrayT>
+  static void GetPrimitiveArrayRegion(JNIEnv* env, JArrayT java_array,
+                                      jsize start, jsize length, ElementT* buf) {
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_array);
+    ScopedObjectAccess soa(env);
+    ArtArrayT* array =
+        DecodeAndCheckArrayType<JArrayT, ElementT, ArtArrayT>(soa, java_array,
+                                                              "GetPrimitiveArrayRegion",
+                                                              "get region of");
+    if (array != nullptr) {
+      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+        ThrowAIOOBE(soa, array, start, length, "src");
+      } else {
+        CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
+        ElementT* data = array->GetData();
+        memcpy(buf, data + start, length * sizeof(ElementT));
+      }
     }
   }
 
-  template <typename JavaArrayT, typename JavaT, typename ArrayT>
-  static void SetPrimitiveArrayRegion(ScopedObjectAccess& soa, JavaArrayT java_array,
-                                      jsize start, jsize length, const JavaT* buf)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK_NON_NULL_ARGUMENT(java_array);
-    ArrayT* array = soa.Decode<ArrayT*>(java_array);
-    if (start < 0 || length < 0 || start + length > array->GetLength()) {
-      ThrowAIOOBE(soa, array, start, length, "dst");
-    } else {
-      CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      JavaT* data = array->GetData();
-      memcpy(data + start, buf, length * sizeof(JavaT));
+  template <typename JArrayT, typename ElementT, typename ArtArrayT>
+  static void SetPrimitiveArrayRegion(JNIEnv* env, JArrayT java_array,
+                                      jsize start, jsize length, const ElementT* buf) {
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_array);
+    ScopedObjectAccess soa(env);
+    ArtArrayT* array =
+        DecodeAndCheckArrayType<JArrayT, ElementT, ArtArrayT>(soa, java_array,
+                                                              "SetPrimitiveArrayRegion",
+                                                              "set region of");
+    if (array != nullptr) {
+      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+        ThrowAIOOBE(soa, array, start, length, "dst");
+      } else {
+        CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
+        ElementT* data = array->GetData();
+        memcpy(data + start, buf, length * sizeof(ElementT));
+      }
     }
   }
 };
@@ -2848,13 +2896,14 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int /*capacity*/) {
+void JNIEnvExt::PushFrame(int capacity) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  UNUSED(capacity);  // cpplint gets confused with (int) and thinks its a cast.
   // TODO: take 'capacity' into account.
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
 }
 
-void JNIEnvExt::PopFrame() {
+void JNIEnvExt::PopFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   locals.SetSegmentState(local_ref_cookie);
   local_ref_cookie = stacked_local_ref_cookies.back();
   stacked_local_ref_cookies.pop_back();
@@ -3089,7 +3138,7 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(const std::string& path,
-                                  const SirtRef<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   std::string* detail) {
   detail->clear();
 
@@ -3105,18 +3154,18 @@
     library = libraries->Get(path);
   }
   if (library != nullptr) {
-    if (library->GetClassLoader() != class_loader.get()) {
+    if (library->GetClassLoader() != class_loader.Get()) {
       // The library will be associated with class_loader. The JNI
       // spec says we can't load the same library into more than one
       // class loader.
       StringAppendF(detail, "Shared library \"%s\" already opened by "
           "ClassLoader %p; can't open in ClassLoader %p",
-          path.c_str(), library->GetClassLoader(), class_loader.get());
+          path.c_str(), library->GetClassLoader(), class_loader.Get());
       LOG(WARNING) << detail;
       return false;
     }
     VLOG(jni) << "[Shared library \"" << path << "\" already loaded in "
-              << "ClassLoader " << class_loader.get() << "]";
+              << "ClassLoader " << class_loader.Get() << "]";
     if (!library->CheckOnLoadResult()) {
       StringAppendF(detail, "JNI_OnLoad failed on a previous attempt "
           "to load \"%s\"", path.c_str());
@@ -3157,18 +3206,18 @@
     MutexLock mu(self, libraries_lock);
     library = libraries->Get(path);
     if (library == nullptr) {  // We won race to get libraries_lock
-      library = new SharedLibrary(path, handle, class_loader.get());
+      library = new SharedLibrary(path, handle, class_loader.Get());
       libraries->Put(path, library);
       created_library = true;
     }
   }
   if (!created_library) {
     LOG(INFO) << "WOW: we lost a race to add shared library: "
-        << "\"" << path << "\" ClassLoader=" << class_loader.get();
+        << "\"" << path << "\" ClassLoader=" << class_loader.Get();
     return library->CheckOnLoadResult();
   }
 
-  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader.get()
+  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader.Get()
       << "]";
 
   bool was_successful = false;
@@ -3183,8 +3232,9 @@
     // the comments in the JNI FindClass function.)
     typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
     JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
-    SirtRef<mirror::ClassLoader> old_class_loader(self, self->GetClassLoaderOverride());
-    self->SetClassLoaderOverride(class_loader.get());
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> old_class_loader(hs.NewHandle(self->GetClassLoaderOverride()));
+    self->SetClassLoaderOverride(class_loader.Get());
 
     int version = 0;
     {
@@ -3193,7 +3243,7 @@
       version = (*jni_on_load)(this, nullptr);
     }
 
-    self->SetClassLoaderOverride(old_class_loader.get());
+    self->SetClassLoaderOverride(old_class_loader.Get());
 
     if (version == JNI_ERR) {
       StringAppendF(detail, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index ec911b2..7e76e11 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -46,7 +46,7 @@
 class Libraries;
 class ParsedOptions;
 class ScopedObjectAccess;
-template<class T> class SirtRef;
+template<class T> class Handle;
 class Thread;
 
 void JniAbortF(const char* jni_function_name, const char* fmt, ...)
@@ -67,7 +67,7 @@
    * Returns 'true' on success. On failure, sets 'detail' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(const std::string& path, const SirtRef<mirror::ClassLoader>& class_loader,
+  bool LoadNativeLibrary(const std::string& path, Handle<mirror::ClassLoader> class_loader,
                          std::string* detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -116,7 +116,8 @@
 
   // JNI global references.
   ReaderWriterMutex globals_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
-  IndirectReferenceTable globals GUARDED_BY(globals_lock);
+  // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject.
+  IndirectReferenceTable globals;
 
   Mutex libraries_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   Libraries* libraries GUARDED_BY(libraries_lock);
@@ -169,7 +170,7 @@
   uint32_t local_ref_cookie;
 
   // JNI local references.
-  IndirectReferenceTable locals;
+  IndirectReferenceTable locals GUARDED_BY(Locks::mutator_lock_);
 
   // Stack of cookies corresponding to PushLocalFrame/PopLocalFrame calls.
   // TODO: to avoid leaks (and bugs), we need to clear this vector on entry (or return)
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 071b658..3429827 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -33,35 +33,54 @@
     // Turn on -verbose:jni for the JNI tests.
     // gLogVerbosity.jni = true;
 
-    vm_->AttachCurrentThread(&env_, NULL);
+    vm_->AttachCurrentThread(&env_, nullptr);
 
     ScopedLocalRef<jclass> aioobe(env_,
                                   env_->FindClass("java/lang/ArrayIndexOutOfBoundsException"));
-    CHECK(aioobe.get() != NULL);
+    CHECK(aioobe.get() != nullptr);
     aioobe_ = reinterpret_cast<jclass>(env_->NewGlobalRef(aioobe.get()));
 
     ScopedLocalRef<jclass> ase(env_, env_->FindClass("java/lang/ArrayStoreException"));
-    CHECK(ase.get() != NULL);
+    CHECK(ase.get() != nullptr);
     ase_ = reinterpret_cast<jclass>(env_->NewGlobalRef(ase.get()));
 
     ScopedLocalRef<jclass> sioobe(env_,
                                   env_->FindClass("java/lang/StringIndexOutOfBoundsException"));
-    CHECK(sioobe.get() != NULL);
+    CHECK(sioobe.get() != nullptr);
     sioobe_ = reinterpret_cast<jclass>(env_->NewGlobalRef(sioobe.get()));
   }
 
+  void ExpectException(jclass exception_class) {
+    EXPECT_TRUE(env_->ExceptionCheck());
+    jthrowable exception = env_->ExceptionOccurred();
+    EXPECT_NE(nullptr, exception);
+    env_->ExceptionClear();
+    EXPECT_TRUE(env_->IsInstanceOf(exception, exception_class));
+  }
+
+  void ExpectClassFound(const char* name) {
+    EXPECT_NE(env_->FindClass(name), nullptr) << name;
+    EXPECT_FALSE(env_->ExceptionCheck()) << name;
+  }
+
+  void ExpectClassNotFound(const char* name) {
+    EXPECT_EQ(env_->FindClass(name), nullptr) << name;
+    EXPECT_TRUE(env_->ExceptionCheck()) << name;
+    env_->ExceptionClear();
+  }
+
   void CleanUpJniEnv() {
-    if (aioobe_ != NULL) {
+    if (aioobe_ != nullptr) {
       env_->DeleteGlobalRef(aioobe_);
-      aioobe_ = NULL;
+      aioobe_ = nullptr;
     }
-    if (ase_ != NULL) {
+    if (ase_ != nullptr) {
       env_->DeleteGlobalRef(ase_);
-      ase_ = NULL;
+      ase_ = nullptr;
     }
-    if (sioobe_ != NULL) {
+    if (sioobe_ != nullptr) {
       env_->DeleteGlobalRef(sioobe_);
-      sioobe_ = NULL;
+      sioobe_ = nullptr;
     }
   }
 
@@ -86,9 +105,9 @@
 
 TEST_F(JniInternalTest, AllocObject) {
   jclass c = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   jobject o = env_->AllocObject(c);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
 
   // We have an instance of the class we asked for...
   ASSERT_TRUE(env_->IsInstanceOf(o, c));
@@ -96,139 +115,152 @@
   // we didn't call a constructor.
   ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
   ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "offset", "I")));
-  ASSERT_TRUE(env_->GetObjectField(o, env_->GetFieldID(c, "value", "[C")) == NULL);
+  ASSERT_TRUE(env_->GetObjectField(o, env_->GetFieldID(c, "value", "[C")) == nullptr);
 }
 
 TEST_F(JniInternalTest, GetVersion) {
   ASSERT_EQ(JNI_VERSION_1_6, env_->GetVersion());
 }
 
-#define EXPECT_CLASS_FOUND(NAME) \
-  EXPECT_TRUE(env_->FindClass(NAME) != NULL); \
-  EXPECT_FALSE(env_->ExceptionCheck())
-
-#define EXPECT_CLASS_NOT_FOUND(NAME) \
-  EXPECT_TRUE(env_->FindClass(NAME) == NULL); \
-  EXPECT_TRUE(env_->ExceptionCheck()); \
-  env_->ExceptionClear()
-
 TEST_F(JniInternalTest, FindClass) {
   // Reference types...
-  EXPECT_CLASS_FOUND("java/lang/String");
+  ExpectClassFound("java/lang/String");
   // ...for arrays too, where you must include "L;".
-  EXPECT_CLASS_FOUND("[Ljava/lang/String;");
+  ExpectClassFound("[Ljava/lang/String;");
   // Primitive arrays are okay too, if the primitive type is valid.
-  EXPECT_CLASS_FOUND("[C");
+  ExpectClassFound("[C");
 
   {
-    // We support . as well as / for compatibility, if -Xcheck:jni is off.
     CheckJniAbortCatcher check_jni_abort_catcher;
-    EXPECT_CLASS_FOUND("java.lang.String");
+    env_->FindClass(nullptr);
+    check_jni_abort_catcher.Check("name == null");
+
+    // We support . as well as / for compatibility, if -Xcheck:jni is off.
+    ExpectClassFound("java.lang.String");
     check_jni_abort_catcher.Check("illegal class name 'java.lang.String'");
-    EXPECT_CLASS_NOT_FOUND("Ljava.lang.String;");
+    ExpectClassNotFound("Ljava.lang.String;");
     check_jni_abort_catcher.Check("illegal class name 'Ljava.lang.String;'");
-    EXPECT_CLASS_FOUND("[Ljava.lang.String;");
+    ExpectClassFound("[Ljava.lang.String;");
     check_jni_abort_catcher.Check("illegal class name '[Ljava.lang.String;'");
-    EXPECT_CLASS_NOT_FOUND("[java.lang.String");
+    ExpectClassNotFound("[java.lang.String");
     check_jni_abort_catcher.Check("illegal class name '[java.lang.String'");
 
     // You can't include the "L;" in a JNI class descriptor.
-    EXPECT_CLASS_NOT_FOUND("Ljava/lang/String;");
+    ExpectClassNotFound("Ljava/lang/String;");
     check_jni_abort_catcher.Check("illegal class name 'Ljava/lang/String;'");
 
     // But you must include it for an array of any reference type.
-    EXPECT_CLASS_NOT_FOUND("[java/lang/String");
+    ExpectClassNotFound("[java/lang/String");
     check_jni_abort_catcher.Check("illegal class name '[java/lang/String'");
 
-    EXPECT_CLASS_NOT_FOUND("[K");
+    ExpectClassNotFound("[K");
     check_jni_abort_catcher.Check("illegal class name '[K'");
+
+    // Void arrays aren't allowed.
+    ExpectClassNotFound("[V");
+    check_jni_abort_catcher.Check("illegal class name '[V'");
   }
 
   // But primitive types aren't allowed...
-  EXPECT_CLASS_NOT_FOUND("C");
-  EXPECT_CLASS_NOT_FOUND("K");
+  ExpectClassNotFound("C");
+  ExpectClassNotFound("V");
+  ExpectClassNotFound("K");
 }
 
-#define EXPECT_EXCEPTION(exception_class) \
-  do { \
-    EXPECT_TRUE(env_->ExceptionCheck()); \
-    jthrowable exception = env_->ExceptionOccurred(); \
-    EXPECT_NE(static_cast<jthrowable>(NULL), exception); \
-    env_->ExceptionClear(); \
-    EXPECT_TRUE(env_->IsInstanceOf(exception, exception_class)); \
-  } while (false)
-
 TEST_F(JniInternalTest, GetFieldID) {
   jclass jlnsfe = env_->FindClass("java/lang/NoSuchFieldError");
-  ASSERT_TRUE(jlnsfe != NULL);
+  ASSERT_NE(jlnsfe, nullptr);
   jclass c = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
 
   // Wrong type.
   jfieldID fid = env_->GetFieldID(c, "count", "J");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Wrong type where type doesn't exist.
   fid = env_->GetFieldID(c, "count", "Lrod/jane/freddy;");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Wrong name.
   fid = env_->GetFieldID(c, "Count", "I");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Good declared field lookup.
   fid = env_->GetFieldID(c, "count", "I");
-  EXPECT_NE(static_cast<jfieldID>(NULL), fid);
-  EXPECT_TRUE(fid != NULL);
+  EXPECT_NE(nullptr, fid);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Good superclass field lookup.
   c = env_->FindClass("java/lang/StringBuilder");
   fid = env_->GetFieldID(c, "count", "I");
-  EXPECT_NE(static_cast<jfieldID>(NULL), fid);
-  EXPECT_TRUE(fid != NULL);
+  EXPECT_NE(nullptr, fid);
+  EXPECT_NE(fid, nullptr);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Not instance.
   fid = env_->GetFieldID(c, "CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  fid = env_->GetFieldID(nullptr, "count", "I");
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("java_class == null");
+  fid = env_->GetFieldID(c, nullptr, "I");
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("name == null");
+  fid = env_->GetFieldID(c, "count", nullptr);
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("sig == null");
 }
 
 TEST_F(JniInternalTest, GetStaticFieldID) {
   jclass jlnsfe = env_->FindClass("java/lang/NoSuchFieldError");
-  ASSERT_TRUE(jlnsfe != NULL);
+  ASSERT_NE(jlnsfe, nullptr);
   jclass c = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
 
   // Wrong type.
   jfieldID fid = env_->GetStaticFieldID(c, "CASE_INSENSITIVE_ORDER", "J");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Wrong type where type doesn't exist.
   fid = env_->GetStaticFieldID(c, "CASE_INSENSITIVE_ORDER", "Lrod/jane/freddy;");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Wrong name.
   fid = env_->GetStaticFieldID(c, "cASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
 
   // Good declared field lookup.
   fid = env_->GetStaticFieldID(c, "CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
-  EXPECT_NE(static_cast<jfieldID>(NULL), fid);
-  EXPECT_TRUE(fid != NULL);
+  EXPECT_NE(nullptr, fid);
+  EXPECT_NE(fid, nullptr);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Not static.
   fid = env_->GetStaticFieldID(c, "count", "I");
-  EXPECT_EQ(static_cast<jfieldID>(NULL), fid);
-  EXPECT_EXCEPTION(jlnsfe);
+  EXPECT_EQ(nullptr, fid);
+  ExpectException(jlnsfe);
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  fid = env_->GetStaticFieldID(nullptr, "CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("java_class == null");
+  fid = env_->GetStaticFieldID(c, nullptr, "Ljava/util/Comparator;");
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("name == null");
+  fid = env_->GetStaticFieldID(c, "CASE_INSENSITIVE_ORDER", nullptr);
+  EXPECT_EQ(nullptr, fid);
+  check_jni_abort_catcher.Check("sig == null");
 }
 
 TEST_F(JniInternalTest, GetMethodID) {
@@ -242,24 +274,36 @@
   // Check that java.lang.Object.foo() doesn't exist and NoSuchMethodError is
   // a pending exception
   jmethodID method = env_->GetMethodID(jlobject, "foo", "()V");
-  EXPECT_EQ(static_cast<jmethodID>(NULL), method);
-  EXPECT_EXCEPTION(jlnsme);
+  EXPECT_EQ(nullptr, method);
+  ExpectException(jlnsme);
 
   // Check that java.lang.Object.equals() does exist
   method = env_->GetMethodID(jlobject, "equals", "(Ljava/lang/Object;)Z");
-  EXPECT_NE(static_cast<jmethodID>(NULL), method);
+  EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
 
   // Check that GetMethodID for java.lang.String.valueOf(int) fails as the
   // method is static
   method = env_->GetMethodID(jlstring, "valueOf", "(I)Ljava/lang/String;");
-  EXPECT_EQ(static_cast<jmethodID>(NULL), method);
-  EXPECT_EXCEPTION(jlnsme);
+  EXPECT_EQ(nullptr, method);
+  ExpectException(jlnsme);
 
   // Check that GetMethodID for java.lang.NoSuchMethodError.<init>(String) finds the constructor
   method = env_->GetMethodID(jlnsme, "<init>", "(Ljava/lang/String;)V");
-  EXPECT_NE(static_cast<jmethodID>(NULL), method);
+  EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  method = env_->GetMethodID(nullptr, "<init>", "(Ljava/lang/String;)V");
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("java_class == null");
+  method = env_->GetMethodID(jlnsme, nullptr, "(Ljava/lang/String;)V");
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("name == null");
+  method = env_->GetMethodID(jlnsme, "<init>", nullptr);
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("sig == null");
 }
 
 TEST_F(JniInternalTest, GetStaticMethodID) {
@@ -272,97 +316,155 @@
   // Check that java.lang.Object.foo() doesn't exist and NoSuchMethodError is
   // a pending exception
   jmethodID method = env_->GetStaticMethodID(jlobject, "foo", "()V");
-  EXPECT_EQ(static_cast<jmethodID>(NULL), method);
-  EXPECT_EXCEPTION(jlnsme);
+  EXPECT_EQ(nullptr, method);
+  ExpectException(jlnsme);
 
   // Check that GetStaticMethodID for java.lang.Object.equals(Object) fails as
   // the method is not static
   method = env_->GetStaticMethodID(jlobject, "equals", "(Ljava/lang/Object;)Z");
-  EXPECT_EQ(static_cast<jmethodID>(NULL), method);
-  EXPECT_EXCEPTION(jlnsme);
+  EXPECT_EQ(nullptr, method);
+  ExpectException(jlnsme);
 
   // Check that java.lang.String.valueOf(int) does exist
   jclass jlstring = env_->FindClass("java/lang/String");
-  method = env_->GetStaticMethodID(jlstring, "valueOf",
-                                   "(I)Ljava/lang/String;");
-  EXPECT_NE(static_cast<jmethodID>(NULL), method);
+  method = env_->GetStaticMethodID(jlstring, "valueOf", "(I)Ljava/lang/String;");
+  EXPECT_NE(nullptr, method);
   EXPECT_FALSE(env_->ExceptionCheck());
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  method = env_->GetStaticMethodID(nullptr, "valueOf", "(I)Ljava/lang/String;");
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("java_class == null");
+  method = env_->GetStaticMethodID(jlstring, nullptr, "(I)Ljava/lang/String;");
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("name == null");
+  method = env_->GetStaticMethodID(jlstring, "valueOf", nullptr);
+  EXPECT_EQ(nullptr, method);
+  check_jni_abort_catcher.Check("sig == null");
 }
 
 TEST_F(JniInternalTest, FromReflectedField_ToReflectedField) {
   jclass jlrField = env_->FindClass("java/lang/reflect/Field");
   jclass c = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   jfieldID fid = env_->GetFieldID(c, "count", "I");
-  ASSERT_TRUE(fid != NULL);
+  ASSERT_NE(fid, nullptr);
   // Turn the fid into a java.lang.reflect.Field...
   jobject field = env_->ToReflectedField(c, fid, JNI_FALSE);
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(field, jlrField));
   // ...and back again.
   jfieldID fid2 = env_->FromReflectedField(field);
-  ASSERT_TRUE(fid2 != NULL);
+  ASSERT_NE(fid2, nullptr);
   // Make sure we can actually use it.
   jstring s = env_->NewStringUTF("poop");
   ASSERT_EQ(4, env_->GetIntField(s, fid2));
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  field = env_->ToReflectedField(c, nullptr, JNI_FALSE);
+  EXPECT_EQ(field, nullptr);
+  check_jni_abort_catcher.Check("fid == null");
+  fid2 = env_->FromReflectedField(nullptr);
+  ASSERT_EQ(fid2, nullptr);
+  check_jni_abort_catcher.Check("jlr_field == null");
 }
 
 TEST_F(JniInternalTest, FromReflectedMethod_ToReflectedMethod) {
   jclass jlrMethod = env_->FindClass("java/lang/reflect/Method");
   jclass c = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   jmethodID mid = env_->GetMethodID(c, "length", "()I");
-  ASSERT_TRUE(mid != NULL);
+  ASSERT_NE(mid, nullptr);
   // Turn the mid into a java.lang.reflect.Method...
   jobject method = env_->ToReflectedMethod(c, mid, JNI_FALSE);
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(method, jlrMethod));
   // ...and back again.
   jmethodID mid2 = env_->FromReflectedMethod(method);
-  ASSERT_TRUE(mid2 != NULL);
+  ASSERT_NE(mid2, nullptr);
   // Make sure we can actually use it.
   jstring s = env_->NewStringUTF("poop");
-  // TODO: this should return 4, but the runtime skips the method
-  // invoke because the runtime isn't started. In the future it would
-  // be nice to use interpretter for things like this. This still does
-  // validate that we have a sane jmethodID value.
-  ASSERT_EQ(0, env_->CallIntMethod(s, mid2));
+  ASSERT_EQ(4, env_->CallIntMethod(s, mid2));
+
+  // Bad arguments.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  method = env_->ToReflectedMethod(c, nullptr, JNI_FALSE);
+  EXPECT_EQ(method, nullptr);
+  check_jni_abort_catcher.Check("mid == null");
+  mid2 = env_->FromReflectedMethod(method);
+  ASSERT_EQ(mid2, nullptr);
+  check_jni_abort_catcher.Check("jlr_method == null");
 }
 
-void BogusMethod() {
-  // You can't pass NULL function pointers to RegisterNatives.
+static void BogusMethod() {
+  // You can't pass nullptr function pointers to RegisterNatives.
 }
 
-TEST_F(JniInternalTest, RegisterNatives) {
+TEST_F(JniInternalTest, RegisterAndUnregisterNatives) {
   jclass jlobject = env_->FindClass("java/lang/Object");
   jclass jlnsme = env_->FindClass("java/lang/NoSuchMethodError");
 
-  // Sanity check that no exceptions are pending
+  // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
-  // Check that registering to a non-existent java.lang.Object.foo() causes a
-  // NoSuchMethodError
+  // Check that registering to a non-existent java.lang.Object.foo() causes a NoSuchMethodError.
   {
-    JNINativeMethod methods[] = { { "foo", "()V", NULL } };
-    env_->RegisterNatives(jlobject, methods, 1);
+    JNINativeMethod methods[] = { { "foo", "()V", nullptr } };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
   }
-  EXPECT_EXCEPTION(jlnsme);
+  ExpectException(jlnsme);
 
-  // Check that registering non-native methods causes a NoSuchMethodError
+  // Check that registering non-native methods causes a NoSuchMethodError.
   {
-    JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", NULL } };
-    env_->RegisterNatives(jlobject, methods, 1);
+    JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", nullptr } };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
   }
-  EXPECT_EXCEPTION(jlnsme);
+  ExpectException(jlnsme);
 
-  // Check that registering native methods is successful
+  // Check that registering native methods is successful.
   {
     JNINativeMethod methods[] = { { "notify", "()V", reinterpret_cast<void*>(BogusMethod) } };
-    env_->RegisterNatives(jlobject, methods, 1);
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_OK);
+  }
+  EXPECT_FALSE(env_->ExceptionCheck());
+  EXPECT_EQ(env_->UnregisterNatives(jlobject), JNI_OK);
+
+  // Check that registering no methods isn't a failure.
+  {
+    JNINativeMethod methods[] = { };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 0), JNI_OK);
+  }
+  EXPECT_FALSE(env_->ExceptionCheck());
+  EXPECT_EQ(env_->UnregisterNatives(jlobject), JNI_OK);
+
+  // Check that registering a -ve number of methods is a failure.
+  CheckJniAbortCatcher check_jni_abort_catcher;
+  for (int i = -10; i < 0; ++i) {
+    JNINativeMethod methods[] = { };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, i), JNI_ERR);
+    check_jni_abort_catcher.Check("negative method count: ");
   }
   EXPECT_FALSE(env_->ExceptionCheck());
 
-  env_->UnregisterNatives(jlobject);
+  // Passing a class of null is a failure.
+  {
+    JNINativeMethod methods[] = { };
+    EXPECT_EQ(env_->RegisterNatives(nullptr, methods, 0), JNI_ERR);
+    check_jni_abort_catcher.Check("java_class == null");
+  }
+
+  // Passing methods as null is a failure.
+  EXPECT_EQ(env_->RegisterNatives(jlobject, nullptr, 1), JNI_ERR);
+  check_jni_abort_catcher.Check("methods == null");
+
+  // Unregisters null is a failure.
+  EXPECT_EQ(env_->UnregisterNatives(nullptr), JNI_ERR);
+  check_jni_abort_catcher.Check("java_class == null");
+
+  // Unregistering a class with no natives is a warning.
+  EXPECT_EQ(env_->UnregisterNatives(jlnsme), JNI_OK);
 }
 
 #define EXPECT_PRIMITIVE_ARRAY(new_fn, \
@@ -372,56 +474,73 @@
                                release_elements_fn, \
                                scalar_type, \
                                expected_class_descriptor) \
+  jsize size = 4; \
+  \
   { \
     CheckJniAbortCatcher jni_abort_catcher; \
     /* Allocate an negative sized array and check it has the right failure type. */ \
-    env_->new_fn(-1); \
+    EXPECT_EQ(env_->new_fn(-1), nullptr); \
     jni_abort_catcher.Check("negative array length: -1"); \
-    env_->new_fn(std::numeric_limits<jint>::min()); \
+    EXPECT_EQ(env_->new_fn(std::numeric_limits<jint>::min()), nullptr); \
     jni_abort_catcher.Check("negative array length: -2147483648"); \
+    /* Pass the array as null. */ \
+    EXPECT_EQ(0, env_->GetArrayLength(nullptr)); \
+    jni_abort_catcher.Check("java_array == null"); \
+    env_->get_region_fn(nullptr, 0, 0, nullptr); \
+    jni_abort_catcher.Check("java_array == null"); \
+    env_->set_region_fn(nullptr, 0, 0, nullptr); \
+    jni_abort_catcher.Check("java_array == null"); \
+    env_->get_elements_fn(nullptr, nullptr); \
+    jni_abort_catcher.Check("java_array == null"); \
+    env_->release_elements_fn(nullptr, nullptr, 0); \
+    jni_abort_catcher.Check("java_array == null"); \
+    /* Pass the elements for region as null. */ \
+    scalar_type ## Array a = env_->new_fn(size); \
+    env_->get_region_fn(a, 0, size, nullptr); \
+    jni_abort_catcher.Check("buf == null"); \
+    env_->set_region_fn(a, 0, size, nullptr); \
+    jni_abort_catcher.Check("buf == null"); \
   } \
-  jsize size = 4; \
-  \
   /* Allocate an array and check it has the right type and length. */ \
   scalar_type ## Array a = env_->new_fn(size); \
-  EXPECT_TRUE(a != NULL); \
+  EXPECT_NE(a, nullptr); \
   EXPECT_TRUE(env_->IsInstanceOf(a, env_->FindClass(expected_class_descriptor))); \
   EXPECT_EQ(size, env_->GetArrayLength(a)); \
   \
   /* GetPrimitiveArrayRegion/SetPrimitiveArrayRegion */ \
   /* AIOOBE for negative start offset. */ \
-  env_->get_region_fn(a, -1, 1, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
-  env_->set_region_fn(a, -1, 1, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
+  env_->get_region_fn(a, -1, 1, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, -1, 1, nullptr); \
+  ExpectException(aioobe_); \
   \
   /* AIOOBE for negative length. */ \
-  env_->get_region_fn(a, 0, -1, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
-  env_->set_region_fn(a, 0, -1, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
+  env_->get_region_fn(a, 0, -1, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, 0, -1, nullptr); \
+  ExpectException(aioobe_); \
   \
   /* AIOOBE for buffer overrun. */ \
-  env_->get_region_fn(a, size - 1, size, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
-  env_->set_region_fn(a, size - 1, size, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
+  env_->get_region_fn(a, size - 1, size, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, size - 1, size, nullptr); \
+  ExpectException(aioobe_); \
   \
-  /* It's okay for the buffer to be NULL as long as the length is 0. */ \
-  env_->get_region_fn(a, 2, 0, NULL); \
+  /* It's okay for the buffer to be nullptr as long as the length is 0. */ \
+  env_->get_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
-  env_->get_region_fn(a, 123, 0, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
+  env_->get_region_fn(a, 123, 0, nullptr); \
+  ExpectException(aioobe_); \
   \
-  /* It's okay for the buffer to be NULL as long as the length is 0. */ \
-  env_->set_region_fn(a, 2, 0, NULL); \
+  /* It's okay for the buffer to be nullptr as long as the length is 0. */ \
+  env_->set_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
-  env_->set_region_fn(a, 123, 0, NULL); \
-  EXPECT_EXCEPTION(aioobe_); \
+  env_->set_region_fn(a, 123, 0, nullptr); \
+  ExpectException(aioobe_); \
   \
   /* Prepare a couple of buffers. */ \
-  UniquePtr<scalar_type[]> src_buf(new scalar_type[size]); \
-  UniquePtr<scalar_type[]> dst_buf(new scalar_type[size]); \
+  std::unique_ptr<scalar_type[]> src_buf(new scalar_type[size]); \
+  std::unique_ptr<scalar_type[]> dst_buf(new scalar_type[size]); \
   for (jsize i = 0; i < size; ++i) { src_buf[i] = scalar_type(i); } \
   for (jsize i = 0; i < size; ++i) { dst_buf[i] = scalar_type(-1); } \
   \
@@ -441,12 +560,12 @@
   EXPECT_EQ(memcmp(&src_buf[0], &dst_buf[0], size * sizeof(scalar_type)), 0) \
     << "full copy not equal"; \
   /* GetPrimitiveArrayCritical */ \
-  void* v = env_->GetPrimitiveArrayCritical(a, NULL); \
+  void* v = env_->GetPrimitiveArrayCritical(a, nullptr); \
   EXPECT_EQ(memcmp(&src_buf[0], v, size * sizeof(scalar_type)), 0) \
     << "GetPrimitiveArrayCritical not equal"; \
   env_->ReleasePrimitiveArrayCritical(a, v, 0); \
   /* GetXArrayElements */ \
-  scalar_type* xs = env_->get_elements_fn(a, NULL); \
+  scalar_type* xs = env_->get_elements_fn(a, nullptr); \
   EXPECT_EQ(memcmp(&src_buf[0], xs, size * sizeof(scalar_type)), 0) \
     << # get_elements_fn " not equal"; \
   env_->release_elements_fn(a, xs, 0); \
@@ -484,31 +603,206 @@
                          GetShortArrayElements, ReleaseShortArrayElements, jshort, "[S");
 }
 
+TEST_F(JniInternalTest, GetPrimitiveArrayElementsOfWrongType) {
+  CheckJniAbortCatcher jni_abort_catcher;
+  jbooleanArray array = env_->NewBooleanArray(10);
+  jboolean is_copy;
+  EXPECT_EQ(env_->GetByteArrayElements(reinterpret_cast<jbyteArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get byte primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetShortArrayElements(reinterpret_cast<jshortArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get short primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetCharArrayElements(reinterpret_cast<jcharArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get char primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetIntArrayElements(reinterpret_cast<jintArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get int primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetLongArrayElements(reinterpret_cast<jlongArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get long primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetFloatArrayElements(reinterpret_cast<jfloatArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get float primitive array elements with an object of type boolean[]");
+  EXPECT_EQ(env_->GetDoubleArrayElements(reinterpret_cast<jdoubleArray>(array), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get double primitive array elements with an object of type boolean[]");
+  jbyteArray array2 = env_->NewByteArray(10);
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get boolean primitive array elements with an object of type byte[]");
+  jobject object = env_->NewStringUTF("Test String");
+  EXPECT_EQ(env_->GetBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), &is_copy), nullptr);
+  jni_abort_catcher.Check(
+      "attempt to get boolean primitive array elements with an object of type java.lang.String");
+}
+
+TEST_F(JniInternalTest, ReleasePrimitiveArrayElementsOfWrongType) {
+  CheckJniAbortCatcher jni_abort_catcher;
+  jbooleanArray array = env_->NewBooleanArray(10);
+  ASSERT_TRUE(array != nullptr);
+  jboolean is_copy;
+  jboolean* elements = env_->GetBooleanArrayElements(array, &is_copy);
+  ASSERT_TRUE(elements != nullptr);
+  env_->ReleaseByteArrayElements(reinterpret_cast<jbyteArray>(array),
+                                 reinterpret_cast<jbyte*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release byte primitive array elements with an object of type boolean[]");
+  env_->ReleaseShortArrayElements(reinterpret_cast<jshortArray>(array),
+                                  reinterpret_cast<jshort*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release short primitive array elements with an object of type boolean[]");
+  env_->ReleaseCharArrayElements(reinterpret_cast<jcharArray>(array),
+                                 reinterpret_cast<jchar*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release char primitive array elements with an object of type boolean[]");
+  env_->ReleaseIntArrayElements(reinterpret_cast<jintArray>(array),
+                                reinterpret_cast<jint*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release int primitive array elements with an object of type boolean[]");
+  env_->ReleaseLongArrayElements(reinterpret_cast<jlongArray>(array),
+                                 reinterpret_cast<jlong*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release long primitive array elements with an object of type boolean[]");
+  env_->ReleaseFloatArrayElements(reinterpret_cast<jfloatArray>(array),
+                                  reinterpret_cast<jfloat*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release float primitive array elements with an object of type boolean[]");
+  env_->ReleaseDoubleArrayElements(reinterpret_cast<jdoubleArray>(array),
+                                  reinterpret_cast<jdouble*>(elements), 0);
+  jni_abort_catcher.Check(
+      "attempt to release double primitive array elements with an object of type boolean[]");
+  jbyteArray array2 = env_->NewByteArray(10);
+  env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), elements, 0);
+  jni_abort_catcher.Check(
+      "attempt to release boolean primitive array elements with an object of type byte[]");
+  jobject object = env_->NewStringUTF("Test String");
+  env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), elements, 0);
+  jni_abort_catcher.Check(
+      "attempt to release boolean primitive array elements with an object of type java.lang.String");
+}
+TEST_F(JniInternalTest, GetReleasePrimitiveArrayCriticalOfWrongType) {
+  CheckJniAbortCatcher jni_abort_catcher;
+  jobject object = env_->NewStringUTF("Test String");
+  jboolean is_copy;
+  void* elements = env_->GetPrimitiveArrayCritical(reinterpret_cast<jarray>(object), &is_copy);
+  jni_abort_catcher.Check("expected primitive array, given java.lang.String");
+  env_->ReleasePrimitiveArrayCritical(reinterpret_cast<jarray>(object), elements, 0);
+  jni_abort_catcher.Check("expected primitive array, given java.lang.String");
+}
+
+TEST_F(JniInternalTest, GetPrimitiveArrayRegionElementsOfWrongType) {
+  CheckJniAbortCatcher jni_abort_catcher;
+  constexpr size_t kLength = 10;
+  jbooleanArray array = env_->NewBooleanArray(kLength);
+  ASSERT_TRUE(array != nullptr);
+  jboolean elements[kLength];
+  env_->GetByteArrayRegion(reinterpret_cast<jbyteArray>(array), 0, kLength,
+                           reinterpret_cast<jbyte*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of byte primitive array elements with an object of type boolean[]");
+  env_->GetShortArrayRegion(reinterpret_cast<jshortArray>(array), 0, kLength,
+                            reinterpret_cast<jshort*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of short primitive array elements with an object of type boolean[]");
+  env_->GetCharArrayRegion(reinterpret_cast<jcharArray>(array), 0, kLength,
+                           reinterpret_cast<jchar*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of char primitive array elements with an object of type boolean[]");
+  env_->GetIntArrayRegion(reinterpret_cast<jintArray>(array), 0, kLength,
+                          reinterpret_cast<jint*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of int primitive array elements with an object of type boolean[]");
+  env_->GetLongArrayRegion(reinterpret_cast<jlongArray>(array), 0, kLength,
+                           reinterpret_cast<jlong*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of long primitive array elements with an object of type boolean[]");
+  env_->GetFloatArrayRegion(reinterpret_cast<jfloatArray>(array), 0, kLength,
+                            reinterpret_cast<jfloat*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of float primitive array elements with an object of type boolean[]");
+  env_->GetDoubleArrayRegion(reinterpret_cast<jdoubleArray>(array), 0, kLength,
+                           reinterpret_cast<jdouble*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of double primitive array elements with an object of type boolean[]");
+  jbyteArray array2 = env_->NewByteArray(10);
+  env_->GetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(array2), 0, kLength,
+                              reinterpret_cast<jboolean*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of boolean primitive array elements with an object of type byte[]");
+  jobject object = env_->NewStringUTF("Test String");
+  env_->GetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
+                              reinterpret_cast<jboolean*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to get region of boolean primitive array elements with an object of type java.lang.String");
+}
+
+TEST_F(JniInternalTest, SetPrimitiveArrayRegionElementsOfWrongType) {
+  CheckJniAbortCatcher jni_abort_catcher;
+  constexpr size_t kLength = 10;
+  jbooleanArray array = env_->NewBooleanArray(kLength);
+  ASSERT_TRUE(array != nullptr);
+  jboolean elements[kLength];
+  env_->SetByteArrayRegion(reinterpret_cast<jbyteArray>(array), 0, kLength,
+                           reinterpret_cast<jbyte*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of byte primitive array elements with an object of type boolean[]");
+  env_->SetShortArrayRegion(reinterpret_cast<jshortArray>(array), 0, kLength,
+                            reinterpret_cast<jshort*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of short primitive array elements with an object of type boolean[]");
+  env_->SetCharArrayRegion(reinterpret_cast<jcharArray>(array), 0, kLength,
+                           reinterpret_cast<jchar*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of char primitive array elements with an object of type boolean[]");
+  env_->SetIntArrayRegion(reinterpret_cast<jintArray>(array), 0, kLength,
+                          reinterpret_cast<jint*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of int primitive array elements with an object of type boolean[]");
+  env_->SetLongArrayRegion(reinterpret_cast<jlongArray>(array), 0, kLength,
+                           reinterpret_cast<jlong*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of long primitive array elements with an object of type boolean[]");
+  env_->SetFloatArrayRegion(reinterpret_cast<jfloatArray>(array), 0, kLength,
+                            reinterpret_cast<jfloat*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of float primitive array elements with an object of type boolean[]");
+  env_->SetDoubleArrayRegion(reinterpret_cast<jdoubleArray>(array), 0, kLength,
+                           reinterpret_cast<jdouble*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of double primitive array elements with an object of type boolean[]");
+  jbyteArray array2 = env_->NewByteArray(10);
+  env_->SetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(array2), 0, kLength,
+                              reinterpret_cast<jboolean*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of boolean primitive array elements with an object of type byte[]");
+  jobject object = env_->NewStringUTF("Test String");
+  env_->SetBooleanArrayRegion(reinterpret_cast<jbooleanArray>(object), 0, kLength,
+                              reinterpret_cast<jboolean*>(elements));
+  jni_abort_catcher.Check(
+      "attempt to set region of boolean primitive array elements with an object of type java.lang.String");
+}
+
 TEST_F(JniInternalTest, NewObjectArray) {
   jclass element_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(element_class != nullptr);
+  ASSERT_NE(element_class, nullptr);
   jclass array_class = env_->FindClass("[Ljava/lang/String;");
-  ASSERT_TRUE(array_class != nullptr);
+  ASSERT_NE(array_class, nullptr);
 
   jobjectArray a = env_->NewObjectArray(0, element_class, nullptr);
-  EXPECT_TRUE(a != nullptr);
+  EXPECT_NE(a, nullptr);
   EXPECT_TRUE(env_->IsInstanceOf(a, array_class));
   EXPECT_EQ(0, env_->GetArrayLength(a));
 
   a = env_->NewObjectArray(1, element_class, nullptr);
-  EXPECT_TRUE(a != nullptr);
+  EXPECT_NE(a, nullptr);
   EXPECT_TRUE(env_->IsInstanceOf(a, array_class));
   EXPECT_EQ(1, env_->GetArrayLength(a));
   EXPECT_TRUE(env_->IsSameObject(env_->GetObjectArrayElement(a, 0), nullptr));
-}
 
-TEST_F(JniInternalTest, NewObjectArrayWithNegativeLength) {
-  jclass element_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(element_class != nullptr);
-  jclass array_class = env_->FindClass("[Ljava/lang/String;");
-  ASSERT_TRUE(array_class != nullptr);
+  // Negative array length checks.
   CheckJniAbortCatcher jni_abort_catcher;
-
   env_->NewObjectArray(-1, element_class, nullptr);
   jni_abort_catcher.Check("negative array length: -1");
 
@@ -525,6 +819,8 @@
 
   CheckJniAbortCatcher jni_abort_catcher;
   for (size_t i = 0; i < strlen(primitive_descriptors); ++i) {
+    env_->NewObjectArray(0, nullptr, nullptr);
+    jni_abort_catcher.Check("element_jclass == null");
     jclass primitive_class = GetPrimitiveClass(primitive_descriptors[i]);
     env_->NewObjectArray(1, primitive_class, nullptr);
     std::string error_msg(StringPrintf("not an object type: %s", primitive_names[i]));
@@ -534,13 +830,13 @@
 
 TEST_F(JniInternalTest, NewObjectArrayWithInitialValue) {
   jclass element_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(element_class != nullptr);
+  ASSERT_NE(element_class, nullptr);
   jclass array_class = env_->FindClass("[Ljava/lang/String;");
-  ASSERT_TRUE(array_class != nullptr);
+  ASSERT_NE(array_class, nullptr);
 
   jstring s = env_->NewStringUTF("poop");
   jobjectArray a = env_->NewObjectArray(2, element_class, s);
-  EXPECT_TRUE(a != nullptr);
+  EXPECT_NE(a, nullptr);
   EXPECT_TRUE(env_->IsInstanceOf(a, array_class));
   EXPECT_EQ(2, env_->GetArrayLength(a));
   EXPECT_TRUE(env_->IsSameObject(env_->GetObjectArrayElement(a, 0), s));
@@ -559,9 +855,9 @@
 
 TEST_F(JniInternalTest, GetObjectClass) {
   jclass string_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(string_class != NULL);
+  ASSERT_NE(string_class, nullptr);
   jclass class_class = env_->FindClass("java/lang/Class");
-  ASSERT_TRUE(class_class != NULL);
+  ASSERT_NE(class_class, nullptr);
 
   jstring s = env_->NewStringUTF("poop");
   jclass c = env_->GetObjectClass(s);
@@ -569,33 +865,50 @@
 
   jclass c2 = env_->GetObjectClass(c);
   ASSERT_TRUE(env_->IsSameObject(class_class, env_->GetObjectClass(c2)));
+
+  // Null as object should fail.
+  CheckJniAbortCatcher jni_abort_catcher;
+  EXPECT_EQ(env_->GetObjectClass(nullptr), nullptr);
+  jni_abort_catcher.Check("java_object == null");
 }
 
 TEST_F(JniInternalTest, GetSuperclass) {
   jclass object_class = env_->FindClass("java/lang/Object");
-  ASSERT_TRUE(object_class != NULL);
+  ASSERT_NE(object_class, nullptr);
   jclass string_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(string_class != NULL);
+  ASSERT_NE(string_class, nullptr);
   jclass runnable_interface = env_->FindClass("java/lang/Runnable");
-  ASSERT_TRUE(runnable_interface != NULL);
+  ASSERT_NE(runnable_interface, nullptr);
   ASSERT_TRUE(env_->IsSameObject(object_class, env_->GetSuperclass(string_class)));
-  ASSERT_TRUE(env_->GetSuperclass(object_class) == NULL);
+  ASSERT_EQ(env_->GetSuperclass(object_class), nullptr);
   ASSERT_TRUE(env_->IsSameObject(object_class, env_->GetSuperclass(runnable_interface)));
+
+  // Null as class should fail.
+  CheckJniAbortCatcher jni_abort_catcher;
+  EXPECT_EQ(env_->GetSuperclass(nullptr), nullptr);
+  jni_abort_catcher.Check("java_class == null");
 }
 
 TEST_F(JniInternalTest, IsAssignableFrom) {
   jclass object_class = env_->FindClass("java/lang/Object");
-  ASSERT_TRUE(object_class != NULL);
+  ASSERT_NE(object_class, nullptr);
   jclass string_class = env_->FindClass("java/lang/String");
-  ASSERT_TRUE(string_class != NULL);
+  ASSERT_NE(string_class, nullptr);
 
   ASSERT_TRUE(env_->IsAssignableFrom(object_class, string_class));
   ASSERT_FALSE(env_->IsAssignableFrom(string_class, object_class));
+
+  // Null as either class should fail.
+  CheckJniAbortCatcher jni_abort_catcher;
+  EXPECT_EQ(env_->IsAssignableFrom(nullptr, string_class), JNI_FALSE);
+  jni_abort_catcher.Check("java_class1 == null");
+  EXPECT_EQ(env_->IsAssignableFrom(object_class, nullptr), JNI_FALSE);
+  jni_abort_catcher.Check("java_class2 == null");
 }
 
 TEST_F(JniInternalTest, GetObjectRefType) {
   jclass local = env_->FindClass("java/lang/Object");
-  ASSERT_TRUE(local != NULL);
+  ASSERT_TRUE(local != nullptr);
   EXPECT_EQ(JNILocalRefType, env_->GetObjectRefType(local));
 
   jobject global = env_->NewGlobalRef(local);
@@ -608,33 +921,38 @@
   EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(invalid));
 
   // TODO: invoke a native method and test that its arguments are considered local references.
+
+  // Null as object should fail.
+  CheckJniAbortCatcher jni_abort_catcher;
+  EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(nullptr));
+  jni_abort_catcher.Check("java_object == null");
 }
 
 TEST_F(JniInternalTest, StaleWeakGlobal) {
   jclass java_lang_Class = env_->FindClass("java/lang/Class");
-  ASSERT_TRUE(java_lang_Class != NULL);
-  jobjectArray local_ref = env_->NewObjectArray(1, java_lang_Class, NULL);
-  ASSERT_TRUE(local_ref != NULL);
+  ASSERT_NE(java_lang_Class, nullptr);
+  jobjectArray local_ref = env_->NewObjectArray(1, java_lang_Class, nullptr);
+  ASSERT_NE(local_ref, nullptr);
   jweak weak_global = env_->NewWeakGlobalRef(local_ref);
-  ASSERT_TRUE(weak_global != NULL);
+  ASSERT_NE(weak_global, nullptr);
   env_->DeleteLocalRef(local_ref);
   Runtime::Current()->GetHeap()->CollectGarbage(false);  // GC should clear the weak global.
   jobject new_global_ref = env_->NewGlobalRef(weak_global);
-  EXPECT_TRUE(new_global_ref == NULL);
+  EXPECT_EQ(new_global_ref, nullptr);
   jobject new_local_ref = env_->NewLocalRef(weak_global);
-  EXPECT_TRUE(new_local_ref == NULL);
+  EXPECT_EQ(new_local_ref, nullptr);
 }
 
 TEST_F(JniInternalTest, NewStringUTF) {
-  EXPECT_TRUE(env_->NewStringUTF(NULL) == NULL);
+  EXPECT_EQ(env_->NewStringUTF(nullptr), nullptr);
   jstring s;
 
   s = env_->NewStringUTF("");
-  EXPECT_TRUE(s != NULL);
+  EXPECT_NE(s, nullptr);
   EXPECT_EQ(0, env_->GetStringLength(s));
   EXPECT_EQ(0, env_->GetStringUTFLength(s));
   s = env_->NewStringUTF("hello");
-  EXPECT_TRUE(s != NULL);
+  EXPECT_NE(s, nullptr);
   EXPECT_EQ(5, env_->GetStringLength(s));
   EXPECT_EQ(5, env_->GetStringUTFLength(s));
 
@@ -645,11 +963,11 @@
   jchar chars[] = { 'h', 'i' };
   jstring s;
   s = env_->NewString(chars, 0);
-  EXPECT_TRUE(s != NULL);
+  EXPECT_NE(s, nullptr);
   EXPECT_EQ(0, env_->GetStringLength(s));
   EXPECT_EQ(0, env_->GetStringUTFLength(s));
   s = env_->NewString(chars, 2);
-  EXPECT_TRUE(s != NULL);
+  EXPECT_NE(s, nullptr);
   EXPECT_EQ(2, env_->GetStringLength(s));
   EXPECT_EQ(2, env_->GetStringUTFLength(s));
 
@@ -658,7 +976,7 @@
 
 TEST_F(JniInternalTest, NewStringNullCharsZeroLength) {
   jstring s = env_->NewString(nullptr, 0);
-  EXPECT_TRUE(s != nullptr);
+  EXPECT_NE(s, nullptr);
   EXPECT_EQ(0, env_->GetStringLength(s));
 }
 
@@ -682,16 +1000,16 @@
 
 TEST_F(JniInternalTest, GetStringRegion_GetStringUTFRegion) {
   jstring s = env_->NewStringUTF("hello");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(s != nullptr);
 
-  env_->GetStringRegion(s, -1, 0, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringRegion(s, 0, -1, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringRegion(s, 0, 10, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringRegion(s, 10, 1, NULL);
-  EXPECT_EXCEPTION(sioobe_);
+  env_->GetStringRegion(s, -1, 0, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringRegion(s, 0, -1, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringRegion(s, 0, 10, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringRegion(s, 10, 1, nullptr);
+  ExpectException(sioobe_);
 
   jchar chars[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringRegion(s, 1, 2, &chars[1]);
@@ -700,20 +1018,20 @@
   EXPECT_EQ('l', chars[2]);
   EXPECT_EQ('x', chars[3]);
 
-  // It's okay for the buffer to be NULL as long as the length is 0.
-  env_->GetStringRegion(s, 2, 0, NULL);
+  // It's okay for the buffer to be nullptr as long as the length is 0.
+  env_->GetStringRegion(s, 2, 0, nullptr);
   // Even if the offset is invalid...
-  env_->GetStringRegion(s, 123, 0, NULL);
-  EXPECT_EXCEPTION(sioobe_);
+  env_->GetStringRegion(s, 123, 0, nullptr);
+  ExpectException(sioobe_);
 
-  env_->GetStringUTFRegion(s, -1, 0, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringUTFRegion(s, 0, -1, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringUTFRegion(s, 0, 10, NULL);
-  EXPECT_EXCEPTION(sioobe_);
-  env_->GetStringUTFRegion(s, 10, 1, NULL);
-  EXPECT_EXCEPTION(sioobe_);
+  env_->GetStringUTFRegion(s, -1, 0, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringUTFRegion(s, 0, -1, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringUTFRegion(s, 0, 10, nullptr);
+  ExpectException(sioobe_);
+  env_->GetStringUTFRegion(s, 10, 1, nullptr);
+  ExpectException(sioobe_);
 
   char bytes[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringUTFRegion(s, 1, 2, &bytes[1]);
@@ -722,25 +1040,25 @@
   EXPECT_EQ('l', bytes[2]);
   EXPECT_EQ('x', bytes[3]);
 
-  // It's okay for the buffer to be NULL as long as the length is 0.
-  env_->GetStringUTFRegion(s, 2, 0, NULL);
+  // It's okay for the buffer to be nullptr as long as the length is 0.
+  env_->GetStringUTFRegion(s, 2, 0, nullptr);
   // Even if the offset is invalid...
-  env_->GetStringUTFRegion(s, 123, 0, NULL);
-  EXPECT_EXCEPTION(sioobe_);
+  env_->GetStringUTFRegion(s, 123, 0, nullptr);
+  ExpectException(sioobe_);
 }
 
 TEST_F(JniInternalTest, GetStringUTFChars_ReleaseStringUTFChars) {
-  // Passing in a NULL jstring is ignored normally, but caught by -Xcheck:jni.
+  // Passing in a nullptr jstring is ignored normally, but caught by -Xcheck:jni.
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
-    EXPECT_TRUE(env_->GetStringUTFChars(NULL, NULL) == NULL);
+    EXPECT_EQ(env_->GetStringUTFChars(nullptr, nullptr), nullptr);
     check_jni_abort_catcher.Check("GetStringUTFChars received null jstring");
   }
 
   jstring s = env_->NewStringUTF("hello");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(s != nullptr);
 
-  const char* utf = env_->GetStringUTFChars(s, NULL);
+  const char* utf = env_->GetStringUTFChars(s, nullptr);
   EXPECT_STREQ("hello", utf);
   env_->ReleaseStringUTFChars(s, utf);
 
@@ -753,10 +1071,10 @@
 
 TEST_F(JniInternalTest, GetStringChars_ReleaseStringChars) {
   jstring s = env_->NewStringUTF("hello");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(s != nullptr);
 
   jchar expected[] = { 'h', 'e', 'l', 'l', 'o' };
-  const jchar* chars = env_->GetStringChars(s, NULL);
+  const jchar* chars = env_->GetStringChars(s, nullptr);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -777,10 +1095,10 @@
 
 TEST_F(JniInternalTest, GetStringCritical_ReleaseStringCritical) {
   jstring s = env_->NewStringUTF("hello");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(s != nullptr);
 
   jchar expected[] = { 'h', 'e', 'l', 'l', 'o' };
-  const jchar* chars = env_->GetStringCritical(s, NULL);
+  const jchar* chars = env_->GetStringCritical(s, nullptr);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -802,45 +1120,72 @@
 
 TEST_F(JniInternalTest, GetObjectArrayElement_SetObjectArrayElement) {
   jclass java_lang_Class = env_->FindClass("java/lang/Class");
-  ASSERT_TRUE(java_lang_Class != NULL);
+  ASSERT_TRUE(java_lang_Class != nullptr);
 
-  jobjectArray array = env_->NewObjectArray(1, java_lang_Class, NULL);
-  EXPECT_TRUE(array != NULL);
-  EXPECT_TRUE(env_->GetObjectArrayElement(array, 0) == NULL);
+  jobjectArray array = env_->NewObjectArray(1, java_lang_Class, nullptr);
+  EXPECT_NE(array, nullptr);
+  EXPECT_EQ(env_->GetObjectArrayElement(array, 0), nullptr);
   env_->SetObjectArrayElement(array, 0, java_lang_Class);
   EXPECT_TRUE(env_->IsSameObject(env_->GetObjectArrayElement(array, 0), java_lang_Class));
 
   // ArrayIndexOutOfBounds for negative index.
   env_->SetObjectArrayElement(array, -1, java_lang_Class);
-  EXPECT_EXCEPTION(aioobe_);
+  ExpectException(aioobe_);
 
   // ArrayIndexOutOfBounds for too-large index.
   env_->SetObjectArrayElement(array, 1, java_lang_Class);
-  EXPECT_EXCEPTION(aioobe_);
+  ExpectException(aioobe_);
 
   // ArrayStoreException thrown for bad types.
   env_->SetObjectArrayElement(array, 0, env_->NewStringUTF("not a jclass!"));
-  EXPECT_EXCEPTION(ase_);
+  ExpectException(ase_);
+
+  // Null as array should fail.
+  CheckJniAbortCatcher jni_abort_catcher;
+  EXPECT_EQ(nullptr, env_->GetObjectArrayElement(nullptr, 0));
+  jni_abort_catcher.Check("java_array == null");
+  env_->SetObjectArrayElement(nullptr, 0, nullptr);
+  jni_abort_catcher.Check("java_array == null");
 }
 
 #define EXPECT_STATIC_PRIMITIVE_FIELD(type, field_name, sig, value1, value2) \
   do { \
     jfieldID fid = env_->GetStaticFieldID(c, field_name, sig); \
-    EXPECT_TRUE(fid != NULL); \
+    EXPECT_NE(fid, nullptr); \
     env_->SetStatic ## type ## Field(c, fid, value1); \
-    EXPECT_TRUE(value1 == env_->GetStatic ## type ## Field(c, fid)); \
+    EXPECT_EQ(value1, env_->GetStatic ## type ## Field(c, fid)); \
     env_->SetStatic ## type ## Field(c, fid, value2); \
-    EXPECT_TRUE(value2 == env_->GetStatic ## type ## Field(c, fid)); \
+    EXPECT_EQ(value2, env_->GetStatic ## type ## Field(c, fid)); \
+    \
+    CheckJniAbortCatcher jni_abort_catcher; \
+    env_->GetStatic ## type ## Field(nullptr, fid); \
+    jni_abort_catcher.Check("received null jclass"); \
+    env_->SetStatic ## type ## Field(nullptr, fid, value1); \
+    jni_abort_catcher.Check("received null jclass"); \
+    env_->GetStatic ## type ## Field(c, nullptr); \
+    jni_abort_catcher.Check("fid == null"); \
+    env_->SetStatic ## type ## Field(c, nullptr, value1); \
+    jni_abort_catcher.Check("fid == null"); \
   } while (false)
 
 #define EXPECT_PRIMITIVE_FIELD(instance, type, field_name, sig, value1, value2) \
   do { \
     jfieldID fid = env_->GetFieldID(c, field_name, sig); \
-    EXPECT_TRUE(fid != NULL); \
+    EXPECT_NE(fid, nullptr); \
     env_->Set ## type ## Field(instance, fid, value1); \
-    EXPECT_TRUE(value1 == env_->Get ## type ## Field(instance, fid)); \
+    EXPECT_EQ(value1, env_->Get ## type ## Field(instance, fid)); \
     env_->Set ## type ## Field(instance, fid, value2); \
-    EXPECT_TRUE(value2 == env_->Get ## type ## Field(instance, fid)); \
+    EXPECT_EQ(value2, env_->Get ## type ## Field(instance, fid)); \
+    \
+    CheckJniAbortCatcher jni_abort_catcher; \
+    env_->Get ## type ## Field(nullptr, fid); \
+    jni_abort_catcher.Check("obj == null"); \
+    env_->Set ## type ## Field(nullptr, fid, value1); \
+    jni_abort_catcher.Check("obj == null"); \
+    env_->Get ## type ## Field(instance, nullptr); \
+    jni_abort_catcher.Check("fid == null"); \
+    env_->Set ## type ## Field(instance, nullptr, value1); \
+    jni_abort_catcher.Check("fid == null"); \
   } while (false)
 
 
@@ -849,14 +1194,14 @@
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   bool started = runtime_->Start();
-  CHECK(started);
+  ASSERT_TRUE(started);
 
   jclass c = env_->FindClass("AllFields");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   jobject o = env_->AllocObject(c);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
 
-  EXPECT_STATIC_PRIMITIVE_FIELD(Boolean, "sZ", "Z", true, false);
+  EXPECT_STATIC_PRIMITIVE_FIELD(Boolean, "sZ", "Z", JNI_TRUE, JNI_FALSE);
   EXPECT_STATIC_PRIMITIVE_FIELD(Byte, "sB", "B", 1, 2);
   EXPECT_STATIC_PRIMITIVE_FIELD(Char, "sC", "C", 'a', 'b');
   EXPECT_STATIC_PRIMITIVE_FIELD(Double, "sD", "D", 1.0, 2.0);
@@ -865,7 +1210,7 @@
   EXPECT_STATIC_PRIMITIVE_FIELD(Long, "sJ", "J", 1, 2);
   EXPECT_STATIC_PRIMITIVE_FIELD(Short, "sS", "S", 1, 2);
 
-  EXPECT_PRIMITIVE_FIELD(o, Boolean, "iZ", "Z", true, false);
+  EXPECT_PRIMITIVE_FIELD(o, Boolean, "iZ", "Z", JNI_TRUE, JNI_FALSE);
   EXPECT_PRIMITIVE_FIELD(o, Byte, "iB", "B", 1, 2);
   EXPECT_PRIMITIVE_FIELD(o, Char, "iC", "C", 'a', 'b');
   EXPECT_PRIMITIVE_FIELD(o, Double, "iD", "D", 1.0, 2.0);
@@ -882,19 +1227,19 @@
   runtime_->Start();
 
   jclass c = env_->FindClass("AllFields");
-  ASSERT_TRUE(c != NULL);
+  ASSERT_NE(c, nullptr);
   jobject o = env_->AllocObject(c);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
 
   jstring s1 = env_->NewStringUTF("hello");
-  ASSERT_TRUE(s1 != NULL);
+  ASSERT_NE(s1, nullptr);
   jstring s2 = env_->NewStringUTF("world");
-  ASSERT_TRUE(s2 != NULL);
+  ASSERT_NE(s2, nullptr);
 
   jfieldID s_fid = env_->GetStaticFieldID(c, "sObject", "Ljava/lang/Object;");
-  ASSERT_TRUE(s_fid != NULL);
+  ASSERT_NE(s_fid, nullptr);
   jfieldID i_fid = env_->GetFieldID(c, "iObject", "Ljava/lang/Object;");
-  ASSERT_TRUE(i_fid != NULL);
+  ASSERT_NE(i_fid, nullptr);
 
   env_->SetStaticObjectField(c, s_fid, s1);
   ASSERT_TRUE(env_->IsSameObject(s1, env_->GetStaticObjectField(c, s_fid)));
@@ -907,27 +1252,27 @@
   ASSERT_TRUE(env_->IsSameObject(s2, env_->GetObjectField(o, i_fid)));
 }
 
-TEST_F(JniInternalTest, NewLocalRef_NULL) {
-  EXPECT_TRUE(env_->NewLocalRef(NULL) == NULL);
+TEST_F(JniInternalTest, NewLocalRef_nullptr) {
+  EXPECT_EQ(env_->NewLocalRef(nullptr), nullptr);
 }
 
 TEST_F(JniInternalTest, NewLocalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
   jobject o = env_->NewLocalRef(s);
-  EXPECT_TRUE(o != NULL);
-  EXPECT_TRUE(o != s);
+  EXPECT_NE(o, nullptr);
+  EXPECT_NE(o, s);
 
   EXPECT_EQ(JNILocalRefType, env_->GetObjectRefType(o));
 }
 
-TEST_F(JniInternalTest, DeleteLocalRef_NULL) {
-  env_->DeleteLocalRef(NULL);
+TEST_F(JniInternalTest, DeleteLocalRef_nullptr) {
+  env_->DeleteLocalRef(nullptr);
 }
 
 TEST_F(JniInternalTest, DeleteLocalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
   env_->DeleteLocalRef(s);
 
   // Currently, deleting an already-deleted reference is just a CheckJNI warning.
@@ -941,9 +1286,9 @@
   }
 
   s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
   jobject o = env_->NewLocalRef(s);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
 
   env_->DeleteLocalRef(s);
   env_->DeleteLocalRef(o);
@@ -955,7 +1300,7 @@
   // Android historically treated it, and it's how the RI treats it. It's also the more useful
   // interpretation!
   ASSERT_EQ(JNI_OK, env_->PushLocalFrame(0));
-  env_->PopLocalFrame(NULL);
+  env_->PopLocalFrame(nullptr);
 
   // Negative capacities are not allowed.
   ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(-1));
@@ -966,7 +1311,7 @@
 
 TEST_F(JniInternalTest, PushLocalFrame_PopLocalFrame) {
   jobject original = env_->NewStringUTF("");
-  ASSERT_TRUE(original != NULL);
+  ASSERT_NE(original, nullptr);
 
   jobject outer;
   jobject inner1, inner2;
@@ -991,11 +1336,8 @@
     // Our local reference for the survivor is invalid because the survivor
     // gets a new local reference...
     EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(inner2));
-    // ...but the survivor should be in the local reference table.
-    JNIEnvExt* env = reinterpret_cast<JNIEnvExt*>(env_);
-    EXPECT_TRUE(env->locals.ContainsDirectPointer(inner2_direct_pointer));
 
-    env_->PopLocalFrame(NULL);
+    env_->PopLocalFrame(nullptr);
   }
   EXPECT_EQ(JNILocalRefType, env_->GetObjectRefType(original));
   EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(outer));
@@ -1003,30 +1345,30 @@
   EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(inner2));
 }
 
-TEST_F(JniInternalTest, NewGlobalRef_NULL) {
-  EXPECT_TRUE(env_->NewGlobalRef(NULL) == NULL);
+TEST_F(JniInternalTest, NewGlobalRef_nullptr) {
+  EXPECT_EQ(env_->NewGlobalRef(nullptr), nullptr);
 }
 
 TEST_F(JniInternalTest, NewGlobalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
   jobject o = env_->NewGlobalRef(s);
-  EXPECT_TRUE(o != NULL);
-  EXPECT_TRUE(o != s);
+  EXPECT_NE(o, nullptr);
+  EXPECT_NE(o, s);
 
-  // TODO: check that o is a global reference.
+  EXPECT_EQ(env_->GetObjectRefType(o), JNIGlobalRefType);
 }
 
-TEST_F(JniInternalTest, DeleteGlobalRef_NULL) {
-  env_->DeleteGlobalRef(NULL);
+TEST_F(JniInternalTest, DeleteGlobalRef_nullptr) {
+  env_->DeleteGlobalRef(nullptr);
 }
 
 TEST_F(JniInternalTest, DeleteGlobalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
 
   jobject o = env_->NewGlobalRef(s);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
   env_->DeleteGlobalRef(o);
 
   // Currently, deleting an already-deleted reference is just a CheckJNI warning.
@@ -1040,38 +1382,38 @@
   }
 
   jobject o1 = env_->NewGlobalRef(s);
-  ASSERT_TRUE(o1 != NULL);
+  ASSERT_NE(o1, nullptr);
   jobject o2 = env_->NewGlobalRef(s);
-  ASSERT_TRUE(o2 != NULL);
+  ASSERT_NE(o2, nullptr);
 
   env_->DeleteGlobalRef(o1);
   env_->DeleteGlobalRef(o2);
 }
 
-TEST_F(JniInternalTest, NewWeakGlobalRef_NULL) {
-  EXPECT_TRUE(env_->NewWeakGlobalRef(NULL) == NULL);
+TEST_F(JniInternalTest, NewWeakGlobalRef_nullptr) {
+  EXPECT_EQ(env_->NewWeakGlobalRef(nullptr),   nullptr);
 }
 
 TEST_F(JniInternalTest, NewWeakGlobalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
   jobject o = env_->NewWeakGlobalRef(s);
-  EXPECT_TRUE(o != NULL);
-  EXPECT_TRUE(o != s);
+  EXPECT_NE(o, nullptr);
+  EXPECT_NE(o, s);
 
-  // TODO: check that o is a weak global reference.
+  EXPECT_EQ(env_->GetObjectRefType(o), JNIWeakGlobalRefType);
 }
 
-TEST_F(JniInternalTest, DeleteWeakGlobalRef_NULL) {
-  env_->DeleteWeakGlobalRef(NULL);
+TEST_F(JniInternalTest, DeleteWeakGlobalRef_nullptr) {
+  env_->DeleteWeakGlobalRef(nullptr);
 }
 
 TEST_F(JniInternalTest, DeleteWeakGlobalRef) {
   jstring s = env_->NewStringUTF("");
-  ASSERT_TRUE(s != NULL);
+  ASSERT_NE(s, nullptr);
 
   jobject o = env_->NewWeakGlobalRef(s);
-  ASSERT_TRUE(o != NULL);
+  ASSERT_NE(o, nullptr);
   env_->DeleteWeakGlobalRef(o);
 
   // Currently, deleting an already-deleted reference is just a CheckJNI warning.
@@ -1085,21 +1427,21 @@
   }
 
   jobject o1 = env_->NewWeakGlobalRef(s);
-  ASSERT_TRUE(o1 != NULL);
+  ASSERT_NE(o1, nullptr);
   jobject o2 = env_->NewWeakGlobalRef(s);
-  ASSERT_TRUE(o2 != NULL);
+  ASSERT_NE(o2, nullptr);
 
   env_->DeleteWeakGlobalRef(o1);
   env_->DeleteWeakGlobalRef(o2);
 }
 
 TEST_F(JniInternalTest, Throw) {
-  EXPECT_EQ(JNI_ERR, env_->Throw(NULL));
+  EXPECT_EQ(JNI_ERR, env_->Throw(nullptr));
 
   jclass exception_class = env_->FindClass("java/lang/RuntimeException");
-  ASSERT_TRUE(exception_class != NULL);
+  ASSERT_TRUE(exception_class != nullptr);
   jthrowable exception = reinterpret_cast<jthrowable>(env_->AllocObject(exception_class));
-  ASSERT_TRUE(exception != NULL);
+  ASSERT_TRUE(exception != nullptr);
 
   EXPECT_EQ(JNI_OK, env_->Throw(exception));
   EXPECT_TRUE(env_->ExceptionCheck());
@@ -1109,10 +1451,10 @@
 }
 
 TEST_F(JniInternalTest, ThrowNew) {
-  EXPECT_EQ(JNI_ERR, env_->Throw(NULL));
+  EXPECT_EQ(JNI_ERR, env_->Throw(nullptr));
 
   jclass exception_class = env_->FindClass("java/lang/RuntimeException");
-  ASSERT_TRUE(exception_class != NULL);
+  ASSERT_TRUE(exception_class != nullptr);
 
   jthrowable thrown_exception;
 
@@ -1122,7 +1464,7 @@
   env_->ExceptionClear();
   EXPECT_TRUE(env_->IsInstanceOf(thrown_exception, exception_class));
 
-  EXPECT_EQ(JNI_OK, env_->ThrowNew(exception_class, NULL));
+  EXPECT_EQ(JNI_OK, env_->ThrowNew(exception_class, nullptr));
   EXPECT_TRUE(env_->ExceptionCheck());
   thrown_exception = env_->ExceptionOccurred();
   env_->ExceptionClear();
@@ -1148,26 +1490,26 @@
   ASSERT_TRUE(started);
 
   jclass buffer_class = env_->FindClass("java/nio/Buffer");
-  ASSERT_TRUE(buffer_class != NULL);
+  ASSERT_NE(buffer_class, nullptr);
 
   char bytes[1024];
   jobject buffer = env_->NewDirectByteBuffer(bytes, sizeof(bytes));
-  ASSERT_TRUE(buffer != NULL);
+  ASSERT_NE(buffer, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(buffer, buffer_class));
-  ASSERT_TRUE(env_->GetDirectBufferAddress(buffer) == bytes);
-  ASSERT_TRUE(env_->GetDirectBufferCapacity(buffer) == sizeof(bytes));
+  ASSERT_EQ(env_->GetDirectBufferAddress(buffer), bytes);
+  ASSERT_EQ(env_->GetDirectBufferCapacity(buffer), static_cast<jlong>(sizeof(bytes)));
 }
 
 TEST_F(JniInternalTest, MonitorEnterExit) {
-  // Create an object to torture
+  // Create an object to torture.
   jclass object_class = env_->FindClass("java/lang/Object");
-  ASSERT_TRUE(object_class != NULL);
+  ASSERT_NE(object_class, nullptr);
   jobject object = env_->AllocObject(object_class);
-  ASSERT_TRUE(object != NULL);
+  ASSERT_NE(object, nullptr);
 
   // Expected class of exceptions
   jclass imse_class = env_->FindClass("java/lang/IllegalMonitorStateException");
-  ASSERT_TRUE(imse_class != NULL);
+  ASSERT_NE(imse_class, nullptr);
 
   jthrowable thrown_exception;
 
@@ -1204,13 +1546,13 @@
   env_->ExceptionClear();
   EXPECT_TRUE(env_->IsInstanceOf(thrown_exception, imse_class));
 
-  // It's an error to call MonitorEnter or MonitorExit on NULL.
+  // It's an error to call MonitorEnter or MonitorExit on nullptr.
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
-    env_->MonitorEnter(NULL);
+    env_->MonitorEnter(nullptr);
     check_jni_abort_catcher.Check("in call to MonitorEnter");
 
-    env_->MonitorExit(NULL);
+    env_->MonitorExit(nullptr);
     check_jni_abort_catcher.Check("in call to MonitorExit");
   }
 }
@@ -1222,7 +1564,7 @@
 
   jint err = vm_->DetachCurrentThread();
   EXPECT_EQ(JNI_ERR, err);
-  vm_->AttachCurrentThread(&env_, NULL);  // need attached thread for CommonRuntimeTest::TearDown
+  vm_->AttachCurrentThread(&env_, nullptr);  // need attached thread for CommonRuntimeTest::TearDown
 }
 
 }  // namespace art
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index d75d5c2..5d157dc 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -253,8 +253,8 @@
 }
 
 TEST(Leb128Test, Speed) {
-  UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
-  UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
+  std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
   Leb128EncodingVector builder;
   // Push back 1024 chunks of 1024 values measuring encoding speed.
   uint64_t last_time = NanoTime();
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 1594338..49e0b54 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -18,8 +18,13 @@
 
 #include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
+#include <memory>
 
-#include "UniquePtr.h"
+// See CreateStartPos below.
+#ifdef __BIONIC__
+#include <sys/auxv.h>
+#endif
+
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
 #include "utils.h"
@@ -47,10 +52,61 @@
 }
 
 #if defined(__LP64__) && !defined(__x86_64__)
-// Where to start with low memory allocation.
-static constexpr uintptr_t LOW_MEM_START = kPageSize * 2;
+// Handling mem_map in 32b address range for 64b architectures that do not support MAP_32BIT.
 
-uintptr_t MemMap::next_mem_pos_ = LOW_MEM_START;   // first page to check for low-mem extent
+// The regular start of memory allocations. The first 64KB is protected by SELinux.
+static constexpr uintptr_t LOW_MEM_START = 64 * KB;
+
+// Generate random starting position.
+// To not interfere with image position, take the image's address and only place it below. Current
+// formula (sketch):
+//
+// ART_BASE_ADDR      = 0001XXXXXXXXXXXXXXX
+// ----------------------------------------
+//                    = 0000111111111111111
+// & ~(kPageSize - 1) =~0000000000000001111
+// ----------------------------------------
+// mask               = 0000111111111110000
+// & random data      = YYYYYYYYYYYYYYYYYYY
+// -----------------------------------
+// tmp                = 0000YYYYYYYYYYY0000
+// + LOW_MEM_START    = 0000000000001000000
+// --------------------------------------
+// start
+//
+// getauxval as an entropy source is exposed in Bionic, but not in glibc before 2.16. When we
+// do not have Bionic, simply start with LOW_MEM_START.
+
+// Function is standalone so it can be tested somewhat in mem_map_test.cc.
+#ifdef __BIONIC__
+uintptr_t CreateStartPos(uint64_t input) {
+  CHECK_NE(0, ART_BASE_ADDRESS);
+
+  // Start with all bits below highest bit in ART_BASE_ADDRESS.
+  constexpr size_t leading_zeros = CLZ(static_cast<uint32_t>(ART_BASE_ADDRESS));
+  constexpr uintptr_t mask_ones = (1 << (31 - leading_zeros)) - 1;
+
+  // Lowest (usually 12) bits are not used, as aligned by page size.
+  constexpr uintptr_t mask = mask_ones & ~(kPageSize - 1);
+
+  // Mask input data.
+  return (input & mask) + LOW_MEM_START;
+}
+#endif
+
+static uintptr_t GenerateNextMemPos() {
+#ifdef __BIONIC__
+  uint8_t* random_data = reinterpret_cast<uint8_t*>(getauxval(AT_RANDOM));
+  // The lower 8B are taken for the stack guard. Use the upper 8B (with mask).
+  return CreateStartPos(*reinterpret_cast<uintptr_t*>(random_data + 8));
+#else
+  // No auxv on host, see above.
+  return LOW_MEM_START;
+#endif
+}
+
+// Initialize linear scan to random position.
+uintptr_t MemMap::next_mem_pos_ = GenerateNextMemPos();
 #endif
 
 static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_count,
@@ -76,7 +132,7 @@
   uintptr_t expected = reinterpret_cast<uintptr_t>(expected_ptr);
   uintptr_t limit = expected + byte_count;
 
-  UniquePtr<BacktraceMap> map(BacktraceMap::Create(getpid()));
+  std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
   if (!map->Build()) {
     *error_msg << StringPrintf("Failed to build process map to determine why mmap returned "
                                "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR, actual, expected);
@@ -128,6 +184,20 @@
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
 
+#ifdef __LP64__
+  // When requesting low_4g memory and having an expectation, the requested range should fit into
+  // 4GB.
+  if (low_4gb && (
+      // Start out of bounds.
+      (reinterpret_cast<uintptr_t>(expected) >> 32) != 0 ||
+      // End out of bounds. For simplicity, this will fail for the last page of memory.
+      (reinterpret_cast<uintptr_t>(expected + page_aligned_byte_count) >> 32) != 0)) {
+    *error_msg = StringPrintf("The requested address space (%p, %p) cannot fit in low_4gb",
+                              expected, expected + page_aligned_byte_count);
+    return nullptr;
+  }
+#endif
+
   // TODO:
   // A page allocator would be a useful abstraction here, as
   // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
@@ -192,7 +262,7 @@
 
 #else
 #ifdef __x86_64__
-  if (low_4gb) {
+  if (low_4gb && expected == nullptr) {
     flags |= MAP_32BIT;
   }
 #endif
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 4255d17..1411856 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -28,6 +28,12 @@
 namespace art {
 
 // Used to keep track of mmap segments.
+//
+// On 64b systems not supporting MAP_32BIT, the implementation of MemMap will do a linear scan
+// for free pages. For security, the start of this scan should be randomized. This requires a
+// dynamic initializer.
+// For this to work, it is paramount that there are no other static initializers that access MemMap.
+// Otherwise, calls might see uninitialized values.
 class MemMap {
  public:
   // Request an anonymous region of length 'byte_count' and a requested base address.
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index eea3307..c108a5f 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -16,7 +16,8 @@
 
 #include "mem_map.h"
 
-#include "UniquePtr.h"
+#include <memory>
+
 #include "gtest/gtest.h"
 
 namespace art {
@@ -83,11 +84,44 @@
     }
     delete m1;
   }
+
+#if defined(__LP64__) && !defined(__x86_64__)
+  static uintptr_t GetLinearScanPos() {
+    return MemMap::next_mem_pos_;
+  }
+#endif
 };
 
+#if defined(__LP64__) && !defined(__x86_64__)
+
+#ifdef __BIONIC__
+extern uintptr_t CreateStartPos(uint64_t input);
+#endif
+
+TEST_F(MemMapTest, Start) {
+  uintptr_t start = GetLinearScanPos();
+  EXPECT_LE(64 * KB, start);
+  EXPECT_LT(start, static_cast<uintptr_t>(ART_BASE_ADDRESS));
+
+#ifdef __BIONIC__
+  // Test a couple of values. Make sure they are different.
+  uintptr_t last = 0;
+  for (size_t i = 0; i < 100; ++i) {
+    uintptr_t random_start = CreateStartPos(i * kPageSize);
+    EXPECT_NE(last, random_start);
+    last = random_start;
+  }
+
+  // Even on max, should be below ART_BASE_ADDRESS.
+  EXPECT_LT(CreateStartPos(~0), static_cast<uintptr_t>(ART_BASE_ADDRESS));
+#endif
+  // End of test.
+}
+#endif
+
 TEST_F(MemMapTest, MapAnonymousEmpty) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
                                              nullptr,
                                              0,
                                              PROT_READ,
@@ -108,7 +142,7 @@
 #ifdef __LP64__
 TEST_F(MemMapTest, MapAnonymousEmpty32bit) {
   std::string error_msg;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
                                              nullptr,
                                              kPageSize,
                                              PROT_READ | PROT_WRITE,
@@ -123,7 +157,7 @@
 TEST_F(MemMapTest, MapAnonymousExactAddr) {
   std::string error_msg;
   // Map at an address that should work, which should succeed.
-  UniquePtr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
+  std::unique_ptr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
                                               reinterpret_cast<byte*>(ART_BASE_ADDRESS),
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -133,7 +167,7 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_TRUE(map0->BaseBegin() == reinterpret_cast<void*>(ART_BASE_ADDRESS));
   // Map at an unspecified address, which should succeed.
-  UniquePtr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
+  std::unique_ptr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
                                               nullptr,
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -143,7 +177,7 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_TRUE(map1->BaseBegin() != nullptr);
   // Attempt to map at the same address, which should fail.
-  UniquePtr<MemMap> map2(MemMap::MapAnonymous("MapAnonymous2",
+  std::unique_ptr<MemMap> map2(MemMap::MapAnonymous("MapAnonymous2",
                                               reinterpret_cast<byte*>(map1->BaseBegin()),
                                               kPageSize,
                                               PROT_READ | PROT_WRITE,
@@ -163,4 +197,57 @@
 }
 #endif
 
+TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  std::string error_msg;
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
+                                             reinterpret_cast<byte*>(0x71000000),
+                                             0x21000000,
+                                             PROT_READ | PROT_WRITE,
+                                             true,
+                                             &error_msg));
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 0x71000000U);
+}
+
+TEST_F(MemMapTest, MapAnonymousOverflow) {
+  std::string error_msg;
+  uintptr_t ptr = 0;
+  ptr -= kPageSize;  // Now it's close to the top.
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousOverflow",
+                                             reinterpret_cast<byte*>(ptr),
+                                             2 * kPageSize,  // brings it over the top.
+                                             PROT_READ | PROT_WRITE,
+                                             false,
+                                             &error_msg));
+  ASSERT_EQ(nullptr, map.get());
+  ASSERT_FALSE(error_msg.empty());
+}
+
+#ifdef __LP64__
+TEST_F(MemMapTest, MapAnonymousLow4GBExpectedTooHigh) {
+  std::string error_msg;
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBExpectedTooHigh",
+                                             reinterpret_cast<byte*>(UINT64_C(0x100000000)),
+                                             kPageSize,
+                                             PROT_READ | PROT_WRITE,
+                                             true,
+                                             &error_msg));
+  ASSERT_EQ(nullptr, map.get());
+  ASSERT_FALSE(error_msg.empty());
+}
+
+TEST_F(MemMapTest, MapAnonymousLow4GBRangeTooHigh) {
+  std::string error_msg;
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousLow4GBRangeTooHigh",
+                                             reinterpret_cast<byte*>(0xF0000000),
+                                             0x20000000,
+                                             PROT_READ | PROT_WRITE,
+                                             true,
+                                             &error_msg));
+  ASSERT_EQ(nullptr, map.get());
+  ASSERT_FALSE(error_msg.empty());
+}
+#endif
+
 }  // namespace art
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index bc8d348..65799cd 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,11 +27,11 @@
 namespace art {
 namespace mirror {
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
   size_t component_size =
-      GetClass<kVerifyFlags, kDoReadBarrier>()->template GetComponentSize<kDoReadBarrier>();
+      GetClass<kVerifyFlags, kReadBarrierOption>()->template GetComponentSize<kReadBarrierOption>();
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 139e2d0..1076643 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -26,7 +26,7 @@
 #include "object_array.h"
 #include "object_array-inl.h"
 #include "object_utils.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "utils.h"
 
@@ -42,22 +42,25 @@
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
 static Array* RecursiveCreateMultiArray(Thread* self,
-                                        const SirtRef<Class>& array_class, int current_dimension,
-                                        const SirtRef<mirror::IntArray>& dimensions)
+                                        Handle<Class> array_class, int current_dimension,
+                                        Handle<mirror::IntArray> dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length,
-                                                    array_class->GetComponentSize(),
-                                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
-  if (UNLIKELY(new_array.get() == nullptr)) {
+  StackHandleScope<1> hs(self);
+  Handle<Array> new_array(
+      hs.NewHandle(
+          Array::Alloc<true>(self, array_class.Get(), array_length, array_class->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator())));
+  if (UNLIKELY(new_array.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
     return nullptr;
   }
   if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
-      SirtRef<mirror::Class> sirt_component_type(self, array_class->GetComponentType());
-      Array* sub_array = RecursiveCreateMultiArray(self, sirt_component_type,
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_component_type(hs.NewHandle(array_class->GetComponentType()));
+      Array* sub_array = RecursiveCreateMultiArray(self, h_component_type,
                                                    current_dimension + 1, dimensions);
       if (UNLIKELY(sub_array == nullptr)) {
         CHECK(self->IsExceptionPending());
@@ -67,11 +70,11 @@
       new_array->AsObjectArray<Array>()->Set<false, false>(i, sub_array);
     }
   }
-  return new_array.get();
+  return new_array.Get();
 }
 
-Array* Array::CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
-                               const SirtRef<IntArray>& dimensions) {
+Array* Array::CreateMultiArray(Thread* self, Handle<Class> element_class,
+                               Handle<IntArray> dimensions) {
   // Verify dimensions.
   //
   // The caller is responsible for verifying that "dimArray" is non-null
@@ -90,15 +93,16 @@
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  SirtRef<mirror::Class> array_class(self,
-                                     class_linker->FindArrayClass(self, element_class.get()));
-  if (UNLIKELY(array_class.get() == nullptr)) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> array_class(
+      hs.NewHandle(class_linker->FindArrayClass(self, element_class.Get())));
+  if (UNLIKELY(array_class.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
     return nullptr;
   }
   for (int32_t i = 1; i < dimensions->GetLength(); ++i) {
-    array_class.reset(class_linker->FindArrayClass(self, array_class.get()));
-    if (UNLIKELY(array_class.get() == nullptr)) {
+    array_class.Assign(class_linker->FindArrayClass(self, array_class.Get()));
+    if (UNLIKELY(array_class.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());
       return nullptr;
     }
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 92f0e67..64e2317 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -23,7 +23,7 @@
 
 namespace art {
 
-template<class T> class SirtRef;
+template<class T> class Handle;
 
 namespace mirror {
 
@@ -38,11 +38,12 @@
                       bool fill_usable = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Array* CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
-                                 const SirtRef<IntArray>& dimensions)
+  static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
+                                 Handle<IntArray> dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -154,14 +155,19 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetArrayClass(Class* array_class) {
-    CHECK(array_class_ == NULL);
-    CHECK(array_class != NULL);
+    CHECK(array_class_ == nullptr);
+    CHECK(array_class != nullptr);
     array_class_ = array_class;
   }
 
+  static Class* GetArrayClass() {
+    DCHECK(array_class_ != nullptr);
+    return array_class_;
+  }
+
   static void ResetArrayClass() {
-    CHECK(array_class_ != NULL);
-    array_class_ = NULL;
+    CHECK(array_class_ != nullptr);
+    array_class_ = nullptr;
   }
 
   static void VisitRoots(RootCallback* callback, void* arg)
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 8eb30f9..b3b1b71 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -29,10 +29,11 @@
 namespace art {
 namespace mirror {
 
-// TODO: get global references for these
+// TODO: Get global references for these
 Class* ArtField::java_lang_reflect_ArtField_ = NULL;
 
-ArtField* ArtField::FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field) {
+ArtField* ArtField::FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                       jobject jlr_field) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
   mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
   DCHECK(field != nullptr);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 029bd5a..30cd180 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -27,14 +27,15 @@
 namespace art {
 
 struct ArtFieldOffsets;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 
 namespace mirror {
 
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
-  static ArtField* FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field)
+  static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                      jobject jlr_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -121,7 +122,7 @@
   void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangReflectArtField() {
-    DCHECK(java_lang_reflect_ArtField_ != NULL);
+    DCHECK(java_lang_reflect_ArtField_ != nullptr);
     return java_lang_reflect_ArtField_;
   }
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index fb9a09a..39efa58 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -23,7 +23,8 @@
 #include "entrypoints/entrypoint_utils.h"
 #include "object_array.h"
 #include "oat.h"
-#include "runtime.h"
+#include "quick/quick_method_frame_info.h"
+#include "runtime-inl.h"
 
 namespace art {
 namespace mirror {
@@ -61,17 +62,17 @@
 }
 
 inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() {
-  return GetFieldObject<ObjectArray<String> >(
+  return GetFieldObject<ObjectArray<String>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_));
 }
 
 inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() {
-  return GetFieldObject<ObjectArray<ArtMethod> >(
+  return GetFieldObject<ObjectArray<ArtMethod>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_));
 }
 
 inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() {
-  return GetFieldObject<ObjectArray<Class> >(
+  return GetFieldObject<ObjectArray<Class>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_));
 }
 
@@ -81,7 +82,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatMethodHeader*>(code)[-1].code_size_;
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
 }
 
 inline bool ArtMethod::CheckIncompatibleClassChange(InvokeType type) {
@@ -152,6 +153,65 @@
   SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
 }
 
+inline const void* ArtMethod::GetQuickOatEntryPoint() {
+  if (IsPortableCompiled() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
+    return nullptr;
+  }
+  Runtime* runtime = Runtime::Current();
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods.
+  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  if (UNLIKELY(entry_point == GetQuickToInterpreterBridge()) ||
+      UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
+    return nullptr;
+  }
+  return entry_point;
+}
+
+inline const void* ArtMethod::GetQuickOatCodePointer() {
+  return EntryPointToCodePointer(GetQuickOatEntryPoint());
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetMappingTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable() {
+  const void* code_pointer = GetQuickOatCodePointer();
+  if (code_pointer == nullptr) {
+    return nullptr;
+  }
+  return GetVmapTable(code_pointer);
+}
+
+inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  uint32_t offset =
+      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
+  if (UNLIKELY(offset == 0u)) {
+    return nullptr;
+  }
+  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
+}
+
 inline void ArtMethod::SetOatNativeGcMapOffset(uint32_t gc_map_offset) {
   DCHECK(!Runtime::Current()->IsStarted());
   SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
@@ -195,12 +255,63 @@
   return result;
 }
 
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
+  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
+  return pc - reinterpret_cast<uintptr_t>(code);
+}
+
+inline uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc, const void* quick_entry_point) {
+  DCHECK(quick_entry_point != GetQuickToInterpreterBridge());
+  DCHECK(quick_entry_point == Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this));
+  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
+}
+
 template<VerifyObjectFlags kVerifyFlags>
 inline void ArtMethod::SetNativeMethod(const void* native_method) {
   SetFieldPtr<false, true, kVerifyFlags>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_), native_method);
 }
 
+inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo() {
+  if (UNLIKELY(IsPortableCompiled())) {
+    // Portable compiled dex bytecode or jni stub.
+    return QuickMethodFrameInfo(kStackAlignment, 0u, 0u);
+  }
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(IsAbstract()) || UNLIKELY(IsProxyMethod())) {
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  }
+  if (UNLIKELY(IsRuntimeMethod())) {
+    return runtime->GetRuntimeMethodFrameInfo(this);
+  }
+
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
+  // On failure, instead of nullptr we get the quick-generic-jni-trampoline for native method
+  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
+  // for non-native methods. And we really shouldn't see a failure for non-native methods here.
+  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
+  CHECK(entry_point != GetQuickToInterpreterBridge());
+
+  if (UNLIKELY(entry_point == runtime->GetClassLinker()->GetQuickGenericJniTrampoline())) {
+    // Generic JNI frame.
+    DCHECK(IsNative());
+    uint32_t handle_refs = MethodHelper(this).GetNumberOfReferenceArgsWithoutReceiver() + 1;
+    size_t scope_size = HandleScope::GetAlignedHandleScopeSize(handle_refs);
+    QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+    return QuickMethodFrameInfo(callee_info.FrameSizeInBytes() + scope_size,
+                                callee_info.CoreSpillMask(), callee_info.FpSpillMask());
+  }
+
+  const void* code_pointer = EntryPointToCodePointer(entry_point);
+  return GetQuickFrameInfo(code_pointer);
+}
+
+inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
+  DCHECK(code_pointer != nullptr);
+  DCHECK(code_pointer == GetQuickOatCodePointer());
+  return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 7453d4d..e2d3f41 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -48,7 +48,8 @@
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
-ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method) {
+ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                          jobject jlr_method) {
   mirror::ArtField* f =
       soa.DecodeField(WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
   mirror::ArtMethod* method = f->GetObject(soa.Decode<mirror::Object*>(jlr_method))->AsArtMethod();
@@ -164,23 +165,21 @@
   return result;
 }
 
-uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  return pc - reinterpret_cast<uintptr_t>(code);
-}
-
 uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
   if (IsPortableCompiled()) {
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
+    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
     return DexFile::kDexNoIndex;   // Special no mapping case
   }
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(code);
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   // Assume the caller wants a pc-to-dex mapping so check here first.
   typedef MappingTable::PcToDexIterator It;
   for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
@@ -197,14 +196,16 @@
   }
   if (abort_on_failure) {
       LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
+             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
              << ") in " << PrettyMethod(this);
   }
   return DexFile::kDexNoIndex;
 }
 
 uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) {
-  MappingTable table(GetMappingTable());
+  const void* entry_point = GetQuickOatEntryPoint();
+  MappingTable table(
+      entry_point != nullptr ? GetMappingTable(EntryPointToCodePointer(entry_point)) : nullptr);
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
     return 0;   // Special no mapping/pc == 0 case
@@ -213,16 +214,14 @@
   typedef MappingTable::DexToPcIterator It;
   for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   // Now check pc-to-dex mappings.
   typedef MappingTable::PcToDexIterator It2;
   for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
     if (cur.DexPc() == dex_pc) {
-      const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
-      return reinterpret_cast<uintptr_t>(code) + cur.NativePcOffset();
+      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
     }
   }
   LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
@@ -230,14 +229,15 @@
   return 0;
 }
 
-uint32_t ArtMethod::FindCatchBlock(SirtRef<Class>& exception_type, uint32_t dex_pc,
-                                   bool* has_no_move_exception) {
+uint32_t ArtMethod::FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
+                                   bool* has_no_move_exception, bool* exc_changed) {
   MethodHelper mh(this);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
   // Set aside the exception while we resolve its type.
   Thread* self = Thread::Current();
   ThrowLocation throw_location;
-  SirtRef<mirror::Throwable> exception(self, self->GetException(&throw_location));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException(&throw_location)));
   self->ClearException();
   // Default to handler not found.
   uint32_t found_dex_pc = DexFile::kDexNoIndex;
@@ -251,11 +251,18 @@
     }
     // Does this catch exception type apply?
     Class* iter_exception_type = mh.GetClassFromTypeIdx(iter_type_idx);
-    if (exception_type.get() == nullptr) {
-      self->ClearException();
+    if (iter_exception_type == nullptr) {
+      // Now have a NoClassDefFoundError as exception.
+      // Note: this is not RI behavior. RI would have failed when loading the class.
+      *exc_changed = true;
+
+      // TODO: Add old exception as suppressed.
       LOG(WARNING) << "Unresolved exception class when finding catch block: "
         << mh.GetTypeDescriptorFromTypeIdx(iter_type_idx);
-    } else if (iter_exception_type->IsAssignableFrom(exception_type.get())) {
+
+      // Return immediately.
+      return DexFile::kDexNoIndex;
+    } else if (iter_exception_type->IsAssignableFrom(exception_type.Get())) {
       found_dex_pc = it.GetHandlerAddress();
       break;
     }
@@ -266,8 +273,8 @@
     *has_no_move_exception = (first_catch_instr->Opcode() != Instruction::MOVE_EXCEPTION);
   }
   // Put the exception back.
-  if (exception.get() != nullptr) {
-    self->SetException(throw_location, exception.get());
+  if (exception.Get() != nullptr) {
+    self->SetException(throw_location, exception.Get());
   }
   return found_dex_pc;
 }
@@ -287,9 +294,11 @@
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
   if (UNLIKELY(!runtime->IsStarted())) {
-    LOG(INFO) << "Not invoking " << PrettyMethod(this) << " for a runtime that isn't started";
-    if (result != NULL) {
-      result->SetJ(0);
+    if (IsStatic()) {
+      art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
+    } else {
+      Object* receiver = reinterpret_cast<StackReference<Object>*>(&args[0])->AsMirrorPtr();
+      art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1, result);
     }
   } else {
     const bool kLogInvocationStartAndReturn = false;
@@ -368,43 +377,5 @@
   RegisterNative(self, GetJniDlsymLookupStub(), false);
 }
 
-const void* ArtMethod::GetOatCodePointer() {
-  if (IsPortableCompiled() || IsNative() || IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
-    return nullptr;
-  }
-  Runtime* runtime = Runtime::Current();
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this);
-  // On failure, instead of nullptr we get the quick-to-interpreter-bridge (but not the trampoline).
-  DCHECK(entry_point != GetQuickToInterpreterBridgeTrampoline(runtime->GetClassLinker()));
-  if (entry_point == GetQuickToInterpreterBridge()) {
-    return nullptr;
-  }
-  return EntryPointToCodePointer(entry_point);
-}
-
-const uint8_t* ArtMethod::GetMappingTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatMethodHeader*>(code)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
-const uint8_t* ArtMethod::GetVmapTable() {
-  const void* code = GetOatCodePointer();
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatMethodHeader*>(code)[-1].vmap_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index f61a01d..2e8253f 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -23,6 +23,7 @@
 #include "modifiers.h"
 #include "object.h"
 #include "object_callbacks.h"
+#include "quick/quick_method_frame_info.h"
 
 namespace art {
 
@@ -31,7 +32,7 @@
 union JValue;
 struct MethodClassOffsets;
 class MethodHelper;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
 
@@ -45,7 +46,8 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
-  static ArtMethod* FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method)
+  static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                        jobject jlr_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -295,14 +297,20 @@
     return reinterpret_cast<const void*>(code);
   }
 
+  // Actual entry point pointer to compiled oat code or nullptr.
+  const void* GetQuickOatEntryPoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Actual pointer to compiled oat code or nullptr.
-  const void* GetOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetQuickOatCodePointer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
   const uint8_t* GetMappingTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetMappingTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
   const uint8_t* GetVmapTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const uint8_t* GetVmapTable(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_));
@@ -318,25 +326,28 @@
 
   template <bool kCheckFrameSize = true>
   uint32_t GetFrameSizeInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_));
+    uint32_t result = GetQuickFrameInfo().FrameSizeInBytes();
     if (kCheckFrameSize) {
       DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
     }
     return result;
   }
 
-  void SetFrameSizeInBytes(size_t new_frame_size_in_bytes)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_),
-                      new_frame_size_in_bytes);
-  }
+  QuickMethodFrameInfo GetQuickFrameInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  QuickMethodFrameInfo GetQuickFrameInfo(const void* code_pointer)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t GetReturnPcOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFrameSizeInBytes() - kPointerSize;
+    return GetReturnPcOffsetInBytes(GetFrameSizeInBytes());
   }
 
-  size_t GetSirtOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t GetReturnPcOffsetInBytes(uint32_t frame_size_in_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK_EQ(frame_size_in_bytes, GetFrameSizeInBytes());
+    return frame_size_in_bytes - kPointerSize;
+  }
+
+  size_t GetHandleScopeOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return kPointerSize;
   }
 
@@ -362,26 +373,6 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
 
-  uint32_t GetCoreSpillMask() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_));
-  }
-
-  void SetCoreSpillMask(uint32_t core_spill_mask) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Computed during compilation.
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_), core_spill_mask);
-  }
-
-  uint32_t GetFpSpillMask() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_));
-  }
-
-  void SetFpSpillMask(uint32_t fp_spill_mask) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Computed during compilation.
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_), fp_spill_mask);
-  }
-
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
   // conventions for a method of managed code. Returns false for Proxy methods.
   bool IsRuntimeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -394,6 +385,8 @@
   bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   uintptr_t NativePcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t NativePcOffset(const uintptr_t pc, const void* quick_entry_point)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a native PC to a dex PC.
   uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
@@ -405,15 +398,17 @@
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
-  uint32_t FindCatchBlock(SirtRef<Class>& exception_type, uint32_t dex_pc,
-                          bool* has_no_move_exception)
+  // In the process of finding a catch block we might trigger resolution errors. This is flagged
+  // by exc_changed, which indicates that a different exception is now stored in the thread and
+  // should be reloaded.
+  uint32_t FindCatchBlock(Handle<Class> exception_type, uint32_t dex_pc,
+                          bool* has_no_move_exception, bool* exc_changed)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
 
-  template <bool kDoReadBarrier = true>
   static Class* GetJavaLangReflectArtMethod() {
-    // This does not need a RB because it is a root.
+    DCHECK(java_lang_reflect_ArtMethod_ != nullptr);
     return java_lang_reflect_ArtMethod_;
   }
 
@@ -428,13 +423,13 @@
   HeapReference<Class> declaring_class_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<ArtMethod> > dex_cache_resolved_methods_;
+  HeapReference<ObjectArray<ArtMethod>> dex_cache_resolved_methods_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<Class> > dex_cache_resolved_types_;
+  HeapReference<ObjectArray<Class>> dex_cache_resolved_types_;
 
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
-  HeapReference<ObjectArray<String> > dex_cache_strings_;
+  HeapReference<ObjectArray<String>> dex_cache_strings_;
 
   // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
   // compiled code.
@@ -474,20 +469,6 @@
   // ifTable.
   uint32_t method_index_;
 
-  // --- Quick compiler meta-data. ---
-  // TODO: merge and place in native heap, such as done with the code size.
-
-  // Bit map of spilled machine registers.
-  uint32_t quick_core_spill_mask_;
-
-  // Bit map of spilled floating point machine registers.
-  uint32_t quick_fp_spill_mask_;
-
-  // Fixed frame size for this method when executed.
-  uint32_t quick_frame_size_in_bytes_;
-
-  // --- End of quick compiler meta-data. ---
-
   static Class* java_lang_reflect_ArtMethod_;
 
  private:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 8a1f383..512a66f 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -21,9 +21,11 @@
 
 #include "art_field.h"
 #include "art_method.h"
+#include "class_linker-inl.h"
 #include "class_loader.h"
 #include "common_throws.h"
 #include "dex_cache.h"
+#include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
 #include "object_array-inl.h"
@@ -33,8 +35,14 @@
 namespace art {
 namespace mirror {
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline uint32_t Class::GetObjectSize() {
-  DCHECK(!IsVariableSize()) << " class=" << PrettyTypeOf(this);
+  if (kIsDebugBuild) {
+    // Use a local variable as (D)CHECK can't handle the space between
+    // the two template params.
+    bool is_variable_size = IsVariableSize<kVerifyFlags, kReadBarrierOption>();
+    CHECK(!is_variable_size) << " class=" << PrettyTypeOf(this);
+  }
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_));
 }
 
@@ -56,12 +64,12 @@
 
 inline ObjectArray<ArtMethod>* Class::GetDirectMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
 }
 
 inline void Class::SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod> >(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod>>(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_)));
   DCHECK_NE(0, new_direct_methods->GetLength());
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), new_direct_methods);
@@ -74,7 +82,7 @@
 inline void Class::SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* direct_methods =
-      GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
+      GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_));
   direct_methods->Set<false>(i, f);
 }
 
@@ -86,7 +94,7 @@
 template<VerifyObjectFlags kVerifyFlags>
 inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
 }
 
 inline void Class::SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods) {
@@ -114,18 +122,18 @@
 inline void Class::SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* virtual_methods =
-      GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
+      GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_));
   virtual_methods->Set<false>(i, f);
 }
 
 inline ObjectArray<ArtMethod>* Class::GetVTable() {
   DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
 
 inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable) {
@@ -133,7 +141,7 @@
 }
 
 inline ObjectArray<ArtMethod>* Class::GetImTable() {
-  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, imtable_));
+  return GetFieldObject<ObjectArray<ArtMethod>>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_));
 }
 
 inline void Class::SetImTable(ObjectArray<ArtMethod>* new_imtable) {
@@ -354,18 +362,18 @@
 
 inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields);
 }
 
 inline ObjectArray<ArtField>* Class::GetSFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
+  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
 }
 
 inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)));
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)));
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
 }
 
@@ -381,7 +389,7 @@
 
 inline void Class::SetStaticField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField> >(
+  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField>>(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
   sfields->Set<false>(i, f);
 }
@@ -397,7 +405,7 @@
 
 inline void Class::SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField> >(
+  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField>>(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
   ifields->Set<false>(i, f);
 }
@@ -470,12 +478,21 @@
   DCHECK_GE(this->object_size_, sizeof(Object));
 }
 
-template <bool kIsInstrumented>
+template<bool kIsInstrumented, bool kCheckAddFinalizer>
 inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
   CheckObjectAlloc();
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  return heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_,
-                                                                allocator_type, VoidFunctor());
+  const bool add_finalizer = kCheckAddFinalizer && IsFinalizable();
+  if (!kCheckAddFinalizer) {
+    DCHECK(!IsFinalizable());
+  }
+  mirror::Object* obj =
+      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_,
+                                                             allocator_type, VoidFunctor());
+  if (add_finalizer && LIKELY(obj != nullptr)) {
+    heap->AddFinalizerReference(self, &obj);
+  }
+  return obj;
 }
 
 inline Object* Class::AllocObject(Thread* self) {
@@ -492,17 +509,37 @@
   VisitStaticFieldsReferences<kVisitClass>(this, visitor);
 }
 
-template<bool kDoReadBarrier>
-bool Class::IsArtFieldClass() {
-  Class* java_lang_Class = GetClass<kVerifyNone, kDoReadBarrier>();
-  Class* java_lang_reflect_ArtField =
-      java_lang_Class->GetInstanceField(0)->GetClass<kVerifyNone, kDoReadBarrier>();
-  return this == java_lang_reflect_ArtField;
+inline bool Class::IsArtFieldClass() const {
+  return this == ArtField::GetJavaLangReflectArtField();
 }
 
-template<bool kDoReadBarrier>
-bool Class::IsArtMethodClass() {
-  return this == ArtMethod::GetJavaLangReflectArtMethod<kDoReadBarrier>();
+inline bool Class::IsArtMethodClass() const {
+  return this == ArtMethod::GetJavaLangReflectArtMethod();
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsClassClass() {
+  Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetClass<kVerifyFlags, kReadBarrierOption>();
+  return this == java_lang_Class;
+}
+
+inline const DexFile& Class::GetDexFile() {
+  return *GetDexCache()->GetDexFile();
+}
+
+inline bool Class::DescriptorEquals(const char* match) {
+  if (UNLIKELY(IsArrayClass())) {
+    return match[0] == '[' && GetComponentType()->DescriptorEquals(match + 1);
+  } else if (UNLIKELY(IsPrimitive())) {
+    return strcmp(Primitive::Descriptor(GetPrimitiveType()), match) == 0;
+  } else if (UNLIKELY(IsProxyClass())) {
+    return Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(this) == match;
+  } else {
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(GetClassDef()->class_idx_);
+    return strcmp(dex_file.GetTypeDescriptor(type_id), match) == 0;
+  }
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 662303e..4869b45 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -28,7 +28,7 @@
 #include "object_array-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "throwable.h"
 #include "utils.h"
@@ -77,20 +77,13 @@
         << "Attempt to set as erroneous an already erroneous class " << PrettyClass(this);
 
     // Stash current exception.
-    SirtRef<mirror::Object> old_throw_this_object(self, NULL);
-    SirtRef<mirror::ArtMethod> old_throw_method(self, NULL);
-    SirtRef<mirror::Throwable> old_exception(self, NULL);
-    uint32_t old_throw_dex_pc;
-    {
-      ThrowLocation old_throw_location;
-      mirror::Throwable* old_exception_obj = self->GetException(&old_throw_location);
-      old_throw_this_object.reset(old_throw_location.GetThis());
-      old_throw_method.reset(old_throw_location.GetMethod());
-      old_exception.reset(old_exception_obj);
-      old_throw_dex_pc = old_throw_location.GetDexPc();
-      self->ClearException();
-    }
-    CHECK(old_exception.get() != NULL);
+    StackHandleScope<3> hs(self);
+    ThrowLocation old_throw_location;
+    Handle<mirror::Throwable> old_exception(hs.NewHandle(self->GetException(&old_throw_location)));
+    CHECK(old_exception.Get() != nullptr);
+    Handle<mirror::Object> old_throw_this_object(hs.NewHandle(old_throw_location.GetThis()));
+    Handle<mirror::ArtMethod> old_throw_method(hs.NewHandle(old_throw_location.GetMethod()));
+    uint32_t old_throw_dex_pc = old_throw_location.GetDexPc();
 
     // clear exception to call FindSystemClass
     self->ClearException();
@@ -107,10 +100,10 @@
     }
 
     // Restore exception.
-    ThrowLocation gc_safe_throw_location(old_throw_this_object.get(), old_throw_method.get(),
+    ThrowLocation gc_safe_throw_location(old_throw_this_object.Get(), old_throw_method.Get(),
                                          old_throw_dex_pc);
 
-    self->SetException(gc_safe_throw_location, old_exception.get());
+    self->SetException(gc_safe_throw_location, old_exception.Get());
   }
   CHECK(sizeof(Status) == sizeof(uint32_t)) << PrettyClass(this);
   if (Runtime::Current()->IsActiveTransaction()) {
@@ -143,14 +136,13 @@
 // Class.getName: keywords for primitive types, regular "[I" form for primitive arrays (so "int"
 // but "[I"), and arrays of reference types written between "L" and ";" but with dots rather than
 // slashes (so "java.lang.String" but "[Ljava.lang.String;"). Madness.
-String* Class::ComputeName() {
-  String* name = GetName();
+String* Class::ComputeName(Handle<Class> h_this) {
+  String* name = h_this->GetName();
   if (name != nullptr) {
     return name;
   }
+  std::string descriptor(h_this->GetDescriptor());
   Thread* self = Thread::Current();
-  SirtRef<mirror::Class> sirt_c(self, this);
-  std::string descriptor(ClassHelper(this).GetDescriptor());
   if ((descriptor[0] != 'L') && (descriptor[0] != '[')) {
     // The descriptor indicates that this is the class for
     // a primitive type; special-case the return value.
@@ -179,7 +171,7 @@
     std::replace(descriptor.begin(), descriptor.end(), '/', '.');
     name = String::AllocFromModifiedUtf8(self, descriptor.c_str());
   }
-  sirt_c->SetName(name);
+  h_this->SetName(name);
   return name;
 }
 
@@ -196,52 +188,59 @@
     return;
   }
 
-  Class* super = GetSuperClass();
-  ClassHelper kh(this);
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> h_this(hs.NewHandle(this));
+  Handle<mirror::Class> h_super(hs.NewHandle(GetSuperClass()));
+
   os << "----- " << (IsInterface() ? "interface" : "class") << " "
-     << "'" << kh.GetDescriptor() << "' cl=" << GetClassLoader() << " -----\n",
+     << "'" << GetDescriptor() << "' cl=" << GetClassLoader() << " -----\n",
   os << "  objectSize=" << SizeOf() << " "
-     << "(" << (super != NULL ? super->SizeOf() : -1) << " from super)\n",
+     << "(" << (h_super.Get() != NULL ? h_super->SizeOf() : -1) << " from super)\n",
   os << StringPrintf("  access=0x%04x.%04x\n",
       GetAccessFlags() >> 16, GetAccessFlags() & kAccJavaFlagsMask);
-  if (super != NULL) {
-    os << "  super='" << PrettyClass(super) << "' (cl=" << super->GetClassLoader() << ")\n";
+  if (h_super.Get() != NULL) {
+    os << "  super='" << PrettyClass(h_super.Get()) << "' (cl=" << h_super->GetClassLoader()
+       << ")\n";
   }
   if (IsArrayClass()) {
     os << "  componentType=" << PrettyClass(GetComponentType()) << "\n";
   }
-  if (kh.NumDirectInterfaces() > 0) {
-    os << "  interfaces (" << kh.NumDirectInterfaces() << "):\n";
-    for (size_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
-      Class* interface = kh.GetDirectInterface(i);
+  const size_t num_direct_interfaces = NumDirectInterfaces();
+  if (num_direct_interfaces > 0) {
+    os << "  interfaces (" << num_direct_interfaces << "):\n";
+    for (size_t i = 0; i < num_direct_interfaces; ++i) {
+      Class* interface = GetDirectInterface(self, h_this, i);
       const ClassLoader* cl = interface->GetClassLoader();
       os << StringPrintf("    %2zd: %s (cl=%p)\n", i, PrettyClass(interface).c_str(), cl);
     }
   }
-  os << "  vtable (" << NumVirtualMethods() << " entries, "
-     << (super != NULL ? super->NumVirtualMethods() : 0) << " in super):\n";
+  // After this point, this may have moved due to GetDirectInterface.
+  os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
+     << (h_super.Get() != NULL ? h_super->NumVirtualMethods() : 0) << " in super):\n";
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
-    os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(GetVirtualMethodDuringLinking(i)).c_str());
+    os << StringPrintf("    %2zd: %s\n", i,
+                       PrettyMethod(h_this->GetVirtualMethodDuringLinking(i)).c_str());
   }
-  os << "  direct methods (" << NumDirectMethods() << " entries):\n";
-  for (size_t i = 0; i < NumDirectMethods(); ++i) {
-    os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(GetDirectMethod(i)).c_str());
+  os << "  direct methods (" << h_this->NumDirectMethods() << " entries):\n";
+  for (size_t i = 0; i < h_this->NumDirectMethods(); ++i) {
+    os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(h_this->GetDirectMethod(i)).c_str());
   }
-  if (NumStaticFields() > 0) {
-    os << "  static fields (" << NumStaticFields() << " entries):\n";
-    if (IsResolved() || IsErroneous()) {
-      for (size_t i = 0; i < NumStaticFields(); ++i) {
-        os << StringPrintf("    %2zd: %s\n", i, PrettyField(GetStaticField(i)).c_str());
+  if (h_this->NumStaticFields() > 0) {
+    os << "  static fields (" << h_this->NumStaticFields() << " entries):\n";
+    if (h_this->IsResolved() || h_this->IsErroneous()) {
+      for (size_t i = 0; i < h_this->NumStaticFields(); ++i) {
+        os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetStaticField(i)).c_str());
       }
     } else {
       os << "    <not yet available>";
     }
   }
-  if (NumInstanceFields() > 0) {
-    os << "  instance fields (" << NumInstanceFields() << " entries):\n";
-    if (IsResolved() || IsErroneous()) {
-      for (size_t i = 0; i < NumInstanceFields(); ++i) {
-        os << StringPrintf("    %2zd: %s\n", i, PrettyField(GetInstanceField(i)).c_str());
+  if (h_this->NumInstanceFields() > 0) {
+    os << "  instance fields (" << h_this->NumInstanceFields() << " entries):\n";
+    if (h_this->IsResolved() || h_this->IsErroneous()) {
+      for (size_t i = 0; i < h_this->NumInstanceFields(); ++i) {
+        os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetInstanceField(i)).c_str());
       }
     } else {
       os << "    <not yet available>";
@@ -311,13 +310,7 @@
     return true;
   }
   // Compare the package part of the descriptor string.
-  return IsInSamePackage(ClassHelper(klass1).GetDescriptor(),
-                         ClassHelper(klass2).GetDescriptor());
-}
-
-bool Class::IsClassClass() {
-  Class* java_lang_Class = GetClass()->GetClass();
-  return this == java_lang_Class;
+  return IsInSamePackage(klass1->GetDescriptor().c_str(), klass2->GetDescriptor().c_str());
 }
 
 bool Class::IsStringClass() const {
@@ -596,71 +589,82 @@
   return NULL;
 }
 
-ArtField* Class::FindStaticField(const StringPiece& name, const StringPiece& type) {
+ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
+                                 const StringPiece& type) {
   // Is the field in this class (or its interfaces), or any of its
   // superclasses (or their interfaces)?
-  for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
+  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
+    // Wrap k incase it moves during GetDirectInterface.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
     // Is this field in any of this class' interfaces?
-    ClassHelper kh(k);
-    for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
-      Class* interface = kh.GetDirectInterface(i);
-      f = interface->FindStaticField(name, type);
-      if (f != NULL) {
+    for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> interface(hs.NewHandle(GetDirectInterface(self, h_k, i)));
+      f = FindStaticField(self, interface, name, type);
+      if (f != nullptr) {
         return f;
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
-ArtField* Class::FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
-  for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
+ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+                                 uint32_t dex_field_idx) {
+  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
     if (f != NULL) {
       return f;
     }
+    // Wrap k incase it moves during GetDirectInterface.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
     // Is this field in any of this class' interfaces?
-    ClassHelper kh(k);
-    for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
-      Class* interface = kh.GetDirectInterface(i);
-      f = interface->FindStaticField(dex_cache, dex_field_idx);
-      if (f != NULL) {
+    for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> interface(hs.NewHandle(GetDirectInterface(self, h_k, i)));
+      f = FindStaticField(self, interface, dex_cache, dex_field_idx);
+      if (f != nullptr) {
         return f;
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
-ArtField* Class::FindField(const StringPiece& name, const StringPiece& type) {
+ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
+                           const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
+  for (Class* k = klass.Get(); k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
     f = k->FindDeclaredStaticField(name, type);
-    if (f != NULL) {
+    if (f != nullptr) {
       return f;
     }
     // Is this field in any of this class' interfaces?
-    ClassHelper kh(k);
-    for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
-      Class* interface = kh.GetDirectInterface(i);
-      f = interface->FindStaticField(name, type);
-      if (f != NULL) {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> interface(hs.NewHandle(GetDirectInterface(self, h_k, i)));
+      f = interface->FindStaticField(self, interface, name, type);
+      if (f != nullptr) {
         return f;
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 static void SetPreverifiedFlagOnMethods(mirror::ObjectArray<mirror::ArtMethod>* methods)
@@ -682,5 +686,111 @@
   SetPreverifiedFlagOnMethods(GetVirtualMethods());
 }
 
+std::string Class::GetDescriptor() {
+  if (UNLIKELY(IsArrayClass())) {
+    return GetArrayDescriptor();
+  } else if (UNLIKELY(IsPrimitive())) {
+    return Primitive::Descriptor(GetPrimitiveType());
+  } else if (UNLIKELY(IsProxyClass())) {
+    return Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(this);
+  } else {
+    const DexFile& dex_file = GetDexFile();
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(GetClassDef()->class_idx_);
+    return dex_file.GetTypeDescriptor(type_id);
+  }
+}
+
+std::string Class::GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return "[" + GetComponentType()->GetDescriptor();
+}
+
+const DexFile::ClassDef* Class::GetClassDef() {
+  uint16_t class_def_idx = GetDexClassDefIndex();
+  if (class_def_idx == DexFile::kDexNoIndex16) {
+    return nullptr;
+  }
+  return &GetDexFile().GetClassDef(class_def_idx);
+}
+
+uint32_t Class::NumDirectInterfaces() {
+  if (IsPrimitive()) {
+    return 0;
+  } else if (IsArrayClass()) {
+    return 2;
+  } else if (IsProxyClass()) {
+    mirror::SynthesizedProxyClass* proxy_class=
+        reinterpret_cast<mirror::SynthesizedProxyClass*>(this);
+    mirror::ObjectArray<mirror::Class>* interfaces = proxy_class->GetInterfaces();
+    return interfaces != nullptr ? interfaces->GetLength() : 0;
+  } else {
+    const DexFile::TypeList* interfaces = GetInterfaceTypeList();
+    if (interfaces == nullptr) {
+      return 0;
+    } else {
+      return interfaces->Size();
+    }
+  }
+}
+
+uint16_t Class::GetDirectInterfaceTypeIdx(uint32_t idx) {
+  DCHECK(!IsPrimitive());
+  DCHECK(!IsArrayClass());
+  return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
+}
+
+mirror::Class* Class::GetDirectInterface(Thread* self, Handle<mirror::Class> klass, uint32_t idx) {
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(!klass->IsPrimitive());
+  if (klass->IsArrayClass()) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (idx == 0) {
+      return class_linker->FindSystemClass(self, "Ljava/lang/Cloneable;");
+    } else {
+      DCHECK_EQ(1U, idx);
+      return class_linker->FindSystemClass(self, "Ljava/io/Serializable;");
+    }
+  } else if (klass->IsProxyClass()) {
+    mirror::SynthesizedProxyClass* proxy_class =
+        reinterpret_cast<mirror::SynthesizedProxyClass*>(klass.Get());
+    mirror::ObjectArray<mirror::Class>* interfaces = proxy_class->GetInterfaces();
+    DCHECK(interfaces != nullptr);
+    return interfaces->Get(idx);
+  } else {
+    uint16_t type_idx = klass->GetDirectInterfaceTypeIdx(idx);
+    mirror::Class* interface = klass->GetDexCache()->GetResolvedType(type_idx);
+    if (interface == nullptr) {
+      interface = Runtime::Current()->GetClassLinker()->ResolveType(klass->GetDexFile(), type_idx,
+                                                                    klass.Get());
+      CHECK(interface != nullptr || self->IsExceptionPending());
+    }
+    return interface;
+  }
+}
+
+const char* Class::GetSourceFile() {
+  std::string descriptor(GetDescriptor());
+  const DexFile& dex_file = GetDexFile();
+  const DexFile::ClassDef* dex_class_def = GetClassDef();
+  CHECK(dex_class_def != nullptr) << "No class def for class " << PrettyClass(this);
+  return dex_file.GetSourceFile(*dex_class_def);
+}
+
+std::string Class::GetLocation() {
+  mirror::DexCache* dex_cache = GetDexCache();
+  if (dex_cache != nullptr && !IsProxyClass()) {
+    return dex_cache->GetLocation()->ToModifiedUtf8();
+  }
+  // Arrays and proxies are generated and have no corresponding dex file location.
+  return "generated class";
+}
+
+const DexFile::TypeList* Class::GetInterfaceTypeList() {
+  const DexFile::ClassDef* class_def = GetClassDef();
+  if (class_def == nullptr) {
+    return nullptr;
+  }
+  return GetDexFile().GetInterfacesList(*class_def);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 23211c2..40c9975 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "dex_file.h"
 #include "gc/allocator_type.h"
 #include "invoke_type.h"
 #include "modifiers.h"
@@ -274,7 +275,7 @@
   String* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Returns the cached name.
   void SetName(String* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Sets the cached name.
   // Computes the name, then sets the cached value.
-  String* ComputeName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static String* ComputeName(Handle<Class> h_this) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsProxyClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -360,30 +361,32 @@
     return depth;
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetComponentType<kVerifyFlags, kDoReadBarrier>() != NULL;
+    return GetComponentType<kVerifyFlags, kReadBarrierOption>() != NULL;
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsStringClass() const;
+  bool IsStringClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<bool kDoReadBarrier = true>
-  bool IsArtFieldClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<bool kDoReadBarrier = true>
-  bool IsArtMethodClass();
+  bool IsArtMethodClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* GetComponentType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<Class, kVerifyFlags, kDoReadBarrier>(ComponentTypeOffset());
+    return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(ComponentTypeOffset());
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -393,10 +396,10 @@
     SetFieldObject<false, false>(ComponentTypeOffset(), new_component_type);
   }
 
-  template<bool kDoReadBarrier = true>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Primitive::ComponentSize(
-        GetComponentType<kDefaultVerifyFlags, kDoReadBarrier>()->GetPrimitiveType());
+        GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveType());
   }
 
   bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -412,7 +415,7 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  template <bool kIsInstrumented>
+  template<bool kIsInstrumented, bool kCheckAddFinalizer = true>
   ALWAYS_INLINE Object* Alloc(Thread* self, gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -421,13 +424,17 @@
   Object* AllocNonMovableObject(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes and arrays vary in size, and so the object_size_ field cannot
-    // be used to get their instance size
-    return IsClassClass() || IsArrayClass();
+    // be used to Get their instance size
+    return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
+        IsArrayClass<kVerifyFlags, kReadBarrierOption>();
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, class_size_));
   }
@@ -440,6 +447,8 @@
   void SetClassSize(uint32_t new_class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -766,7 +775,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find a static or instance field using the JLS resolution order
-  ArtField* FindField(const StringPiece& name, const StringPiece& type)
+  static ArtField* FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
+                             const StringPiece& type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Finds the given instance field in this class or a superclass.
@@ -785,12 +795,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Finds the given static field in this class or a superclass.
-  ArtField* FindStaticField(const StringPiece& name, const StringPiece& type)
+  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
+                                   const StringPiece& type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
-  ArtField* FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
+  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+                                   uint32_t dex_field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ArtField* FindDeclaredStaticField(const StringPiece& name, const StringPiece& type)
@@ -847,6 +859,19 @@
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
+  std::string GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool DescriptorEquals(const char* match) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  std::string GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::ClassDef* GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static mirror::Class* GetDirectInterface(Thread* self, Handle<mirror::Class> klass, uint32_t idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  std::string GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile::TypeList* GetInterfaceTypeList() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -877,7 +902,7 @@
   HeapReference<DexCache> dex_cache_;
 
   // static, private, and <init> methods
-  HeapReference<ObjectArray<ArtMethod> > direct_methods_;
+  HeapReference<ObjectArray<ArtMethod>> direct_methods_;
 
   // instance fields
   //
@@ -889,7 +914,7 @@
   // All instance fields that refer to objects are guaranteed to be at
   // the beginning of the field list.  num_reference_instance_fields_
   // specifies the number of reference fields.
-  HeapReference<ObjectArray<ArtField> > ifields_;
+  HeapReference<ObjectArray<ArtField>> ifields_;
 
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
@@ -905,7 +930,7 @@
   HeapReference<IfTable> iftable_;
 
   // Interface method table (imt), for quick "invoke-interface".
-  HeapReference<ObjectArray<ArtMethod> > imtable_;
+  HeapReference<ObjectArray<ArtMethod>> imtable_;
 
   // Descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
   HeapReference<String> name_;
@@ -920,13 +945,13 @@
   HeapReference<Class> verify_error_class_;
 
   // Virtual methods defined in this class; invoked through vtable.
-  HeapReference<ObjectArray<ArtMethod> > virtual_methods_;
+  HeapReference<ObjectArray<ArtMethod>> virtual_methods_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
   // virtual_ methods_ for miranda methods.
-  HeapReference<ObjectArray<ArtMethod> > vtable_;
+  HeapReference<ObjectArray<ArtMethod>> vtable_;
 
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 11a4002..65a5026 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -123,22 +123,22 @@
   }
 
   ObjectArray<String>* GetStrings() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<String> >(StringsOffset());
+    return GetFieldObject< ObjectArray<String>>(StringsOffset());
   }
 
   ObjectArray<Class>* GetResolvedTypes() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<Class> >(
+    return GetFieldObject<ObjectArray<Class>>(
         OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_));
   }
 
   ObjectArray<ArtMethod>* GetResolvedMethods() ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtMethod> >(ResolvedMethodsOffset());
+    return GetFieldObject< ObjectArray<ArtMethod>>(ResolvedMethodsOffset());
   }
 
   ObjectArray<ArtField>* GetResolvedFields() ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ArtField> >(ResolvedFieldsOffset());
+    return GetFieldObject<ObjectArray<ArtField>>(ResolvedFieldsOffset());
   }
 
   const DexFile* GetDexFile() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -153,10 +153,10 @@
  private:
   HeapReference<Object> dex_;
   HeapReference<String> location_;
-  HeapReference<ObjectArray<ArtField> > resolved_fields_;
-  HeapReference<ObjectArray<ArtMethod> > resolved_methods_;
-  HeapReference<ObjectArray<Class> > resolved_types_;
-  HeapReference<ObjectArray<String> > strings_;
+  HeapReference<ObjectArray<ArtField>> resolved_fields_;
+  HeapReference<ObjectArray<ArtMethod>> resolved_methods_;
+  HeapReference<ObjectArray<Class>> resolved_types_;
+  HeapReference<ObjectArray<String>> strings_;
   uint64_t dex_file_;
 
   friend struct art::DexCacheOffsets;  // for verifying offset information
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index fef1f9b..3d28dc6 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -23,7 +23,7 @@
 #include "gc/heap.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 namespace mirror {
@@ -32,9 +32,10 @@
 
 TEST_F(DexCacheTest, Open) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<DexCache> dex_cache(soa.Self(), class_linker_->AllocDexCache(soa.Self(),
-                                                                       *java_lang_dex_file_));
-  ASSERT_TRUE(dex_cache.get() != NULL);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<DexCache> dex_cache(
+      hs.NewHandle(class_linker_->AllocDexCache(soa.Self(), *java_lang_dex_file_)));
+  ASSERT_TRUE(dex_cache.Get() != NULL);
 
   EXPECT_EQ(java_lang_dex_file_->NumStringIds(), dex_cache->NumStrings());
   EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumResolvedTypes());
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index c70a08d..62ab2c1 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -34,9 +34,9 @@
 namespace art {
 namespace mirror {
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Object::GetClass() {
-  return GetFieldObject<Class, kVerifyFlags, kDoReadBarrier>(
+  return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Object, klass_));
 }
 
@@ -105,7 +105,8 @@
 inline Object* Object::GetReadBarrierPointer() {
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
   DCHECK(kUseBakerOrBrooksReadBarrier);
-  return GetFieldObject<Object, kVerifyNone, false>(OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), false);
+  return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
+      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
   LOG(FATAL) << "Unreachable";
   return nullptr;
@@ -118,7 +119,7 @@
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
   SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr, false);
+      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
 #endif
@@ -181,17 +182,17 @@
   return klass->IsAssignableFrom(GetClass<kVerifyFlags>());
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsClass() {
-  Class* java_lang_Class =
-      GetClass<kVerifyFlags, kDoReadBarrier>()->template GetClass<kVerifyFlags, kDoReadBarrier>();
-  return GetClass<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis), kDoReadBarrier>() ==
-      java_lang_Class;
+  Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetClass<kVerifyFlags, kReadBarrierOption>();
+  return GetClass<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis),
+      kReadBarrierOption>() == java_lang_Class;
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Object::AsClass() {
-  DCHECK((IsClass<kVerifyFlags, kDoReadBarrier>()));
+  DCHECK((IsClass<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Class*>(this);
 }
 
@@ -208,15 +209,15 @@
   return down_cast<ObjectArray<T>*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArrayInstance() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->
-      template IsArrayClass<kVerifyFlags, kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArrayClass<kVerifyFlags, kReadBarrierOption>();
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtField() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->template IsArtFieldClass<kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtFieldClass();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -225,9 +226,9 @@
   return down_cast<ArtField*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtMethod() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->template IsArtMethodClass<kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtMethodClass();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -247,9 +248,9 @@
   return down_cast<Reference*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Array* Object::AsArray() {
-  DCHECK((IsArrayInstance<kVerifyFlags, kDoReadBarrier>()));
+  DCHECK((IsArrayInstance<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Array*>(this);
 }
 
@@ -375,21 +376,24 @@
   return GetClass<kVerifyFlags>()->IsPhantomReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Object::SizeOf() {
   size_t result;
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  if (IsArrayInstance<kVerifyFlags, kDoReadBarrier>()) {
-    result = AsArray<kNewFlags, kDoReadBarrier>()->template SizeOf<kNewFlags, kDoReadBarrier>();
-  } else if (IsClass<kNewFlags, kDoReadBarrier>()) {
-    result = AsClass<kNewFlags, kDoReadBarrier>()->template SizeOf<kNewFlags, kDoReadBarrier>();
+  if (IsArrayInstance<kVerifyFlags, kReadBarrierOption>()) {
+    result = AsArray<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags, kReadBarrierOption>();
+  } else if (IsClass<kNewFlags, kReadBarrierOption>()) {
+    result = AsClass<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags, kReadBarrierOption>();
   } else {
-    result = GetClass<kNewFlags, kDoReadBarrier>()->GetObjectSize();
+    result = GetClass<kNewFlags, kReadBarrierOption>()->
+        template GetObjectSize<kNewFlags, kReadBarrierOption>();
   }
   DCHECK_GE(result, sizeof(Object))
-      << " class=" << PrettyTypeOf(GetClass<kNewFlags, kDoReadBarrier>());
-  DCHECK(!(IsArtField<kNewFlags, kDoReadBarrier>())  || result == sizeof(ArtField));
-  DCHECK(!(IsArtMethod<kNewFlags, kDoReadBarrier>()) || result == sizeof(ArtMethod));
+      << " class=" << PrettyTypeOf(GetClass<kNewFlags, kReadBarrierOption>());
+  DCHECK(!(IsArtField<kNewFlags, kReadBarrierOption>())  || result == sizeof(ArtField));
+  DCHECK(!(IsArtMethod<kNewFlags, kReadBarrierOption>()) || result == sizeof(ArtMethod));
   return result;
 }
 
@@ -532,14 +536,15 @@
   return QuasiAtomic::Cas64(old_value, new_value, addr);
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier, bool kIsVolatile>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption,
+         bool kIsVolatile>
 inline T* Object::GetFieldObject(MemberOffset field_offset) {
   if (kVerifyFlags & kVerifyThis) {
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
-  T* result = ReadBarrier::Barrier<T, kDoReadBarrier>(this, field_offset, objref_addr);
+  T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
   if (kIsVolatile) {
     QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
   }
@@ -549,9 +554,9 @@
   return result;
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* Object::GetFieldObjectVolatile(MemberOffset field_offset) {
-  return GetFieldObject<T, kVerifyFlags, kDoReadBarrier, true>(field_offset);
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption, true>(field_offset);
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 2cd71a0..69e5a84 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -32,7 +32,7 @@
 #include "object_array-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "throwable.h"
 #include "well_known_classes.h"
 
@@ -92,9 +92,7 @@
     heap->WriteBarrierEveryFieldOf(dest);
   }
   if (c->IsFinalizable()) {
-    SirtRef<Object> sirt_dest(self, dest);
-    heap->AddFinalizerReference(self, dest);
-    return sirt_dest.get();
+    heap->AddFinalizerReference(self, &dest);
   }
   return dest;
 }
@@ -102,19 +100,19 @@
 // An allocation pre-fence visitor that copies the object.
 class CopyObjectVisitor {
  public:
-  explicit CopyObjectVisitor(Thread* self, SirtRef<Object>* orig, size_t num_bytes)
+  explicit CopyObjectVisitor(Thread* self, Handle<Object>* orig, size_t num_bytes)
       : self_(self), orig_(orig), num_bytes_(num_bytes) {
   }
 
   void operator()(Object* obj, size_t usable_size) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     UNUSED(usable_size);
-    CopyObject(self_, obj, orig_->get(), num_bytes_);
+    CopyObject(self_, obj, orig_->Get(), num_bytes_);
   }
 
  private:
   Thread* const self_;
-  SirtRef<Object>* const orig_;
+  Handle<Object>* const orig_;
   const size_t num_bytes_;
   DISALLOW_COPY_AND_ASSIGN(CopyObjectVisitor);
 };
@@ -125,7 +123,8 @@
   // be wrong.
   gc::Heap* heap = Runtime::Current()->GetHeap();
   size_t num_bytes = SizeOf();
-  SirtRef<Object> this_object(self, this);
+  StackHandleScope<1> hs(self);
+  Handle<Object> this_object(hs.NewHandle(this));
   Object* copy;
   CopyObjectVisitor visitor(self, &this_object, num_bytes);
   if (heap->IsMovableObject(this)) {
@@ -140,10 +139,10 @@
   static AtomicInteger seed(987654321 + std::time(nullptr));
   int32_t expected_value, new_value;
   do {
-    expected_value = static_cast<uint32_t>(seed.Load());
+    expected_value = static_cast<uint32_t>(seed.LoadRelaxed());
     new_value = expected_value * 1103515245 + 12345;
   } while ((expected_value & LockWord::kHashMask) == 0 ||
-      !seed.CompareAndSwap(expected_value, new_value));
+      !seed.CompareExchangeWeakRelaxed(expected_value, new_value));
   return expected_value & LockWord::kHashMask;
 }
 
@@ -165,10 +164,11 @@
       case LockWord::kThinLocked: {
         // Inflate the thin lock to a monitor and stick the hash code inside of the monitor.
         Thread* self = Thread::Current();
-        SirtRef<mirror::Object> sirt_this(self, current_this);
-        Monitor::InflateThinLocked(self, sirt_this, lw, GenerateIdentityHashCode());
+        StackHandleScope<1> hs(self);
+        Handle<mirror::Object> h_this(hs.NewHandle(current_this));
+        Monitor::InflateThinLocked(self, h_this, lw, GenerateIdentityHashCode());
         // A GC may have occurred when we switched to kBlocked.
-        current_this = sirt_this.get();
+        current_this = h_this.Get();
         break;
       }
       case LockWord::kFatLocked: {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index cf28b18..442909d 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -67,7 +67,8 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* GetClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -86,7 +87,8 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool InstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Object* Clone(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -113,9 +115,11 @@
   void Wait(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Wait(Thread* self, int64_t timeout, int32_t nanos) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* AsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -123,9 +127,11 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ObjectArray<T>* AsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArrayInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Array* AsArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -158,12 +164,14 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Throwable* AsThrowable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* AsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtField* AsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -185,12 +193,12 @@
 
   // Accessor for Java type fields.
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-      bool kDoReadBarrier = true, bool kIsVolatile = false>
+      ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kIsVolatile = false>
   T* GetFieldObject(MemberOffset field_offset) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-      bool kDoReadBarrier = true>
+      ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   T* GetFieldObjectVolatile(MemberOffset field_offset) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 203a6b2..9b6e901 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -23,7 +23,7 @@
 #include "mirror/art_field.h"
 #include "mirror/class.h"
 #include "runtime.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include <string>
 
@@ -118,7 +118,7 @@
                                               int32_t src_pos, int32_t count) {
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
-      // The Get will perform the VerifyObject.
+      // The get will perform the VerifyObject.
       src->GetWithoutChecks(src_pos + i);
     }
   }
@@ -150,7 +150,7 @@
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
-      // The Get will perform the VerifyObject.
+      // The get will perform the VerifyObject.
       GetWithoutChecks(dst_pos + i);
     }
   }
@@ -161,7 +161,7 @@
                                              int32_t src_pos, int32_t count) {
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
-      // The Get will perform the VerifyObject.
+      // The get will perform the VerifyObject.
       src->GetWithoutChecks(src_pos + i);
     }
   }
@@ -182,7 +182,7 @@
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
-      // The Get will perform the VerifyObject.
+      // The get will perform the VerifyObject.
       GetWithoutChecks(dst_pos + i);
     }
   }
@@ -244,13 +244,14 @@
 inline ObjectArray<T>* ObjectArray<T>::CopyOf(Thread* self, int32_t new_length) {
   DCHECK_GE(new_length, 0);
   // We may get copied by a compacting GC.
-  SirtRef<ObjectArray<T> > sirt_this(self, this);
+  StackHandleScope<1> hs(self);
+  Handle<ObjectArray<T>> h_this(hs.NewHandle(this));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   gc::AllocatorType allocator_type = heap->IsMovableObject(this) ? heap->GetCurrentAllocator() :
       heap->GetCurrentNonMovingAllocator();
   ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length, allocator_type);
   if (LIKELY(new_array != nullptr)) {
-    new_array->AssignableMemcpy(0, sirt_this.get(), 0, std::min(sirt_this->GetLength(), new_length));
+    new_array->AssignableMemcpy(0, h_this.Get(), 0, std::min(h_this->GetLength(), new_length));
   }
   return new_array;
 }
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index c494f13..18e50ce 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -18,6 +18,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <memory>
 
 #include "array-inl.h"
 #include "art_field-inl.h"
@@ -34,9 +35,8 @@
 #include "art_method-inl.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "string-inl.h"
-#include "UniquePtr.h"
 
 namespace art {
 namespace mirror {
@@ -48,7 +48,7 @@
                     const char* utf16_expected_le,
                     int32_t expected_hash)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UniquePtr<uint16_t[]> utf16_expected(new uint16_t[expected_utf16_length]);
+    std::unique_ptr<uint16_t[]> utf16_expected(new uint16_t[expected_utf16_length]);
     for (int32_t i = 0; i < expected_utf16_length; i++) {
       uint16_t ch = (((utf16_expected_le[i*2 + 0] & 0xff) << 8) |
                      ((utf16_expected_le[i*2 + 1] & 0xff) << 0));
@@ -56,7 +56,9 @@
     }
 
     Thread* self = Thread::Current();
-    SirtRef<String> string(self, String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in));
+    StackHandleScope<1> hs(self);
+    Handle<String> string(
+        hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in)));
     ASSERT_EQ(expected_utf16_length, string->GetLength());
     ASSERT_TRUE(string->GetCharArray() != NULL);
     ASSERT_TRUE(string->GetCharArray()->GetData() != NULL);
@@ -102,8 +104,9 @@
 
 TEST_F(ObjectTest, Clone) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<ObjectArray<Object> > a1(soa.Self(),
-                                   class_linker_->AllocObjectArray<Object>(soa.Self(), 256));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<ObjectArray<Object>> a1(
+      hs.NewHandle(class_linker_->AllocObjectArray<Object>(soa.Self(), 256)));
   size_t s1 = a1->SizeOf();
   Object* clone = a1->Clone(soa.Self());
   EXPECT_EQ(s1, clone->SizeOf());
@@ -112,17 +115,18 @@
 
 TEST_F(ObjectTest, AllocObjectArray) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<ObjectArray<Object> > oa(soa.Self(),
-                                   class_linker_->AllocObjectArray<Object>(soa.Self(), 2));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<ObjectArray<Object>> oa(
+      hs.NewHandle(class_linker_->AllocObjectArray<Object>(soa.Self(), 2)));
   EXPECT_EQ(2, oa->GetLength());
   EXPECT_TRUE(oa->Get(0) == NULL);
   EXPECT_TRUE(oa->Get(1) == NULL);
-  oa->Set<false>(0, oa.get());
-  EXPECT_TRUE(oa->Get(0) == oa.get());
+  oa->Set<false>(0, oa.Get());
+  EXPECT_TRUE(oa->Get(0) == oa.Get());
   EXPECT_TRUE(oa->Get(1) == NULL);
-  oa->Set<false>(1, oa.get());
-  EXPECT_TRUE(oa->Get(0) == oa.get());
-  EXPECT_TRUE(oa->Get(1) == oa.get());
+  oa->Set<false>(1, oa.Get());
+  EXPECT_TRUE(oa->Get(0) == oa.Get());
+  EXPECT_TRUE(oa->Get(1) == oa.Get());
 
   Class* aioobe = class_linker_->FindSystemClass(soa.Self(),
                                                  "Ljava/lang/ArrayIndexOutOfBoundsException;");
@@ -138,31 +142,33 @@
   soa.Self()->ClearException();
 
   ASSERT_TRUE(oa->GetClass() != NULL);
-  ClassHelper oa_ch(oa->GetClass());
-  ASSERT_EQ(2U, oa_ch.NumDirectInterfaces());
+  Handle<mirror::Class> klass(hs.NewHandle(oa->GetClass()));
+  ASSERT_EQ(2U, klass->NumDirectInterfaces());
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;"),
-            oa_ch.GetDirectInterface(0));
+            mirror::Class::GetDirectInterface(soa.Self(), klass, 0));
   EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/io/Serializable;"),
-            oa_ch.GetDirectInterface(1));
+            mirror::Class::GetDirectInterface(soa.Self(), klass, 1));
 }
 
 TEST_F(ObjectTest, AllocArray) {
   ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[I");
-  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                                                  Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<Array> a(
+      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                                      Runtime::Current()->GetHeap()->GetCurrentAllocator())));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                              Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                              Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_EQ(1, a->GetLength());
 }
@@ -170,28 +176,27 @@
 TEST_F(ObjectTest, AllocArray_FillUsable) {
   ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[B");
-  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
-                                                  Runtime::Current()->GetHeap()->GetCurrentAllocator(),
-                                                  true));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<Array> a(
+      hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                                      Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[I");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator(),
-                             true));
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
-                             Runtime::Current()->GetHeap()->GetCurrentAllocator(),
-                             true));
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+  a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
                              Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
   EXPECT_TRUE(c == a->GetClass());
   EXPECT_LE(2, a->GetLength());
@@ -273,8 +278,9 @@
 TEST_F(ObjectTest, CreateMultiArray) {
   ScopedObjectAccess soa(Thread::Current());
 
-  SirtRef<Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(), "I"));
-  SirtRef<IntArray> dims(soa.Self(), IntArray::Alloc(soa.Self(), 1));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<Class> c(hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "I")));
+  Handle<IntArray> dims(hs.NewHandle(IntArray::Alloc(soa.Self(), 1)));
   dims->Set<false>(0, 1);
   Array* multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass(soa.Self(), "[I"));
@@ -287,7 +293,7 @@
             "java.lang.NegativeArraySizeException");
   soa.Self()->ClearException();
 
-  dims.reset(IntArray::Alloc(soa.Self(), 2));
+  dims.Assign(IntArray::Alloc(soa.Self(), 2));
   for (int i = 1; i < 20; ++i) {
     for (int j = 0; j < 20; ++j) {
       dims->Set<false>(0, i);
@@ -311,7 +317,8 @@
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader)[0];
   CHECK(dex_file != NULL);
 
-  SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<ClassLoader*>(class_loader));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<ClassLoader*>(class_loader)));
   Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", loader);
   ArtMethod* clinit = klass->FindClassInitializer();
   const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
@@ -339,9 +346,9 @@
   Object* s0 = field->GetObj(klass);
   EXPECT_TRUE(s0 != NULL);
 
-  SirtRef<CharArray> char_array(soa.Self(), CharArray::Alloc(soa.Self(), 0));
-  field->SetObj<false>(field->GetDeclaringClass(), char_array.get());
-  EXPECT_EQ(char_array.get(), field->GetObj(klass));
+  Handle<CharArray> char_array(hs.NewHandle(CharArray::Alloc(soa.Self(), 0)));
+  field->SetObj<false>(field->GetDeclaringClass(), char_array.Get());
+  EXPECT_EQ(char_array.Get(), field->GetObj(klass));
 
   field->SetObj<false>(field->GetDeclaringClass(), NULL);
   EXPECT_EQ(NULL, field->GetObj(klass));
@@ -375,7 +382,8 @@
 
 TEST_F(ObjectTest, StringEqualsUtf8) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> string(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
   EXPECT_TRUE(string->Equals("android"));
   EXPECT_FALSE(string->Equals("Android"));
   EXPECT_FALSE(string->Equals("ANDROID"));
@@ -383,46 +391,49 @@
   EXPECT_FALSE(string->Equals("and"));
   EXPECT_FALSE(string->Equals("androids"));
 
-  SirtRef<String> empty(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), ""));
+  Handle<String> empty(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "")));
   EXPECT_TRUE(empty->Equals(""));
   EXPECT_FALSE(empty->Equals("a"));
 }
 
 TEST_F(ObjectTest, StringEquals) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> string(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
-  SirtRef<String> string_2(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
-  EXPECT_TRUE(string->Equals(string_2.get()));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
+  Handle<String> string_2(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
+  EXPECT_TRUE(string->Equals(string_2.Get()));
   EXPECT_FALSE(string->Equals("Android"));
   EXPECT_FALSE(string->Equals("ANDROID"));
   EXPECT_FALSE(string->Equals(""));
   EXPECT_FALSE(string->Equals("and"));
   EXPECT_FALSE(string->Equals("androids"));
 
-  SirtRef<String> empty(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), ""));
+  Handle<String> empty(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "")));
   EXPECT_TRUE(empty->Equals(""));
   EXPECT_FALSE(empty->Equals("a"));
 }
 
 TEST_F(ObjectTest, StringCompareTo) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> string(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
-  SirtRef<String> string_2(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
-  SirtRef<String> string_3(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "Android"));
-  SirtRef<String> string_4(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "and"));
-  SirtRef<String> string_5(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), ""));
-  EXPECT_EQ(0, string->CompareTo(string_2.get()));
-  EXPECT_LT(0, string->CompareTo(string_3.get()));
-  EXPECT_GT(0, string_3->CompareTo(string.get()));
-  EXPECT_LT(0, string->CompareTo(string_4.get()));
-  EXPECT_GT(0, string_4->CompareTo(string.get()));
-  EXPECT_LT(0, string->CompareTo(string_5.get()));
-  EXPECT_GT(0, string_5->CompareTo(string.get()));
+  StackHandleScope<5> hs(soa.Self());
+  Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
+  Handle<String> string_2(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
+  Handle<String> string_3(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "Android")));
+  Handle<String> string_4(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "and")));
+  Handle<String> string_5(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "")));
+  EXPECT_EQ(0, string->CompareTo(string_2.Get()));
+  EXPECT_LT(0, string->CompareTo(string_3.Get()));
+  EXPECT_GT(0, string_3->CompareTo(string.Get()));
+  EXPECT_LT(0, string->CompareTo(string_4.Get()));
+  EXPECT_GT(0, string_4->CompareTo(string.Get()));
+  EXPECT_LT(0, string->CompareTo(string_5.Get()));
+  EXPECT_GT(0, string_5->CompareTo(string.Get()));
 }
 
 TEST_F(ObjectTest, StringLength) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> string(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "android"));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
   EXPECT_EQ(string->GetLength(), 7);
   EXPECT_EQ(string->GetUtfLength(), 7);
 
@@ -440,8 +451,9 @@
 
   jobject jclass_loader_1 = LoadDex("ProtoCompare");
   jobject jclass_loader_2 = LoadDex("ProtoCompare2");
-  SirtRef<ClassLoader> class_loader_1(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_1));
-  SirtRef<ClassLoader> class_loader_2(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_2));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<ClassLoader> class_loader_1(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader_1)));
+  Handle<ClassLoader> class_loader_2(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader_2)));
 
   Class* klass1 = linker->FindClass(soa.Self(), "LProtoCompare;", class_loader_1);
   ASSERT_TRUE(klass1 != NULL);
@@ -497,9 +509,10 @@
 
 TEST_F(ObjectTest, StringHashCode) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> empty(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), ""));
-  SirtRef<String> A(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "A"));
-  SirtRef<String> ABC(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "ABC"));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<String> empty(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "")));
+  Handle<String> A(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "A")));
+  Handle<String> ABC(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "ABC")));
 
   EXPECT_EQ(0, empty->GetHashCode());
   EXPECT_EQ(65, A->GetHashCode());
@@ -509,17 +522,18 @@
 TEST_F(ObjectTest, InstanceOf) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
-  SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<ClassLoader> class_loader(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader)));
 
   Class* X = class_linker_->FindClass(soa.Self(), "LX;", class_loader);
   Class* Y = class_linker_->FindClass(soa.Self(), "LY;", class_loader);
   ASSERT_TRUE(X != NULL);
   ASSERT_TRUE(Y != NULL);
 
-  SirtRef<Object> x(soa.Self(), X->AllocObject(soa.Self()));
-  SirtRef<Object> y(soa.Self(), Y->AllocObject(soa.Self()));
-  ASSERT_TRUE(x.get() != NULL);
-  ASSERT_TRUE(y.get() != NULL);
+  Handle<Object> x(hs.NewHandle(X->AllocObject(soa.Self())));
+  Handle<Object> y(hs.NewHandle(Y->AllocObject(soa.Self())));
+  ASSERT_TRUE(x.Get() != NULL);
+  ASSERT_TRUE(y.Get() != NULL);
 
   EXPECT_TRUE(x->InstanceOf(X));
   EXPECT_FALSE(x->InstanceOf(Y));
@@ -543,7 +557,8 @@
 TEST_F(ObjectTest, IsAssignableFrom) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
-  SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<ClassLoader> class_loader(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader)));
   Class* X = class_linker_->FindClass(soa.Self(), "LX;", class_loader);
   Class* Y = class_linker_->FindClass(soa.Self(), "LY;", class_loader);
 
@@ -580,7 +595,8 @@
 TEST_F(ObjectTest, IsAssignableFromArray) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
-  SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<ClassLoader> class_loader(hs.NewHandle(soa.Decode<ClassLoader*>(jclass_loader)));
   Class* X = class_linker_->FindClass(soa.Self(), "LX;", class_loader);
   Class* Y = class_linker_->FindClass(soa.Self(), "LY;", class_loader);
   ASSERT_TRUE(X != NULL);
@@ -632,8 +648,9 @@
 
 TEST_F(ObjectTest, FindInstanceField) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> s(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "ABC"));
-  ASSERT_TRUE(s.get() != NULL);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<String> s(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "ABC")));
+  ASSERT_TRUE(s.Get() != NULL);
   Class* c = s->GetClass();
   ASSERT_TRUE(c != NULL);
 
@@ -665,25 +682,31 @@
 
 TEST_F(ObjectTest, FindStaticField) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<String> s(soa.Self(), String::AllocFromModifiedUtf8(soa.Self(), "ABC"));
-  ASSERT_TRUE(s.get() != NULL);
-  Class* c = s->GetClass();
-  ASSERT_TRUE(c != NULL);
+  StackHandleScope<4> hs(soa.Self());
+  Handle<String> s(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "ABC")));
+  ASSERT_TRUE(s.Get() != NULL);
+  Handle<Class> c(hs.NewHandle(s->GetClass()));
+  ASSERT_TRUE(c.Get() != NULL);
 
   // Wrong type.
   EXPECT_TRUE(c->FindDeclaredStaticField("CASE_INSENSITIVE_ORDER", "I") == NULL);
-  EXPECT_TRUE(c->FindStaticField("CASE_INSENSITIVE_ORDER", "I") == NULL);
+  EXPECT_TRUE(mirror::Class::FindStaticField(soa.Self(), c, "CASE_INSENSITIVE_ORDER", "I") == NULL);
 
   // Wrong name.
   EXPECT_TRUE(c->FindDeclaredStaticField("cASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;") == NULL);
-  EXPECT_TRUE(c->FindStaticField("cASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;") == NULL);
+  EXPECT_TRUE(
+      mirror::Class::FindStaticField(soa.Self(), c, "cASE_INSENSITIVE_ORDER",
+                                     "Ljava/util/Comparator;") == NULL);
 
   // Right name and type.
-  ArtField* f1 = c->FindDeclaredStaticField("CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
-  ArtField* f2 = c->FindStaticField("CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
-  EXPECT_TRUE(f1 != NULL);
-  EXPECT_TRUE(f2 != NULL);
-  EXPECT_EQ(f1, f2);
+  Handle<ArtField> f1(hs.NewHandle(
+      c->FindDeclaredStaticField("CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;")));
+  Handle<ArtField> f2(hs.NewHandle(
+      mirror::Class::FindStaticField(soa.Self(), c, "CASE_INSENSITIVE_ORDER",
+                                     "Ljava/util/Comparator;")));
+  EXPECT_TRUE(f1.Get() != NULL);
+  EXPECT_TRUE(f2.Get() != NULL);
+  EXPECT_EQ(f1.Get(), f2.Get());
 
   // TODO: test static fields via superclasses.
   // TODO: test static fields via interfaces.
diff --git a/runtime/mirror/proxy.h b/runtime/mirror/proxy.h
index 5f9cceb..6e4947e 100644
--- a/runtime/mirror/proxy.h
+++ b/runtime/mirror/proxy.h
@@ -30,18 +30,18 @@
 class MANAGED SynthesizedProxyClass : public Class {
  public:
   ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<Class> >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+    return GetFieldObject<ObjectArray<Class>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
                                                                        interfaces_));
   }
 
-  ObjectArray<ObjectArray<Class> >* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ObjectArray<Class> > >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+  ObjectArray<ObjectArray<Class>>* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ObjectArray<Class>>>(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
                                                                                      throws_));
   }
 
  private:
-  HeapReference<ObjectArray<Class> > interfaces_;
-  HeapReference<ObjectArray<ObjectArray<Class> > > throws_;
+  HeapReference<ObjectArray<Class>> interfaces_;
+  HeapReference<ObjectArray<ObjectArray<Class>>> throws_;
   DISALLOW_IMPLICIT_CONSTRUCTORS(SynthesizedProxyClass);
 };
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index cf65d20..0b6e759 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -42,8 +42,10 @@
     return OFFSET_OF_OBJECT_MEMBER(Reference, referent_);
   }
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Object* GetReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(ReferentOffset());
+    return GetFieldObjectVolatile<Object, kDefaultVerifyFlags, kReadBarrierOption>(
+        ReferentOffset());
   }
   template<bool kTransactionActive>
   void SetReferent(Object* referent) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index f220039..b1de2b6 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -20,7 +20,7 @@
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "string.h"
 
 namespace art {
@@ -39,10 +39,8 @@
   java_lang_StackTraceElement_ = NULL;
 }
 
-StackTraceElement* StackTraceElement::Alloc(Thread* self,
-                                            SirtRef<String>& declaring_class,
-                                            SirtRef<String>& method_name,
-                                            SirtRef<String>& file_name,
+StackTraceElement* StackTraceElement::Alloc(Thread* self, Handle<String> declaring_class,
+                                            Handle<String> method_name, Handle<String> file_name,
                                             int32_t line_number) {
   StackTraceElement* trace =
       down_cast<StackTraceElement*>(GetStackTraceElement()->AllocObject(self));
@@ -57,14 +55,14 @@
 }
 
 template<bool kTransactionActive>
-void StackTraceElement::Init(SirtRef<String>& declaring_class, SirtRef<String>& method_name,
-                             SirtRef<String>& file_name, int32_t line_number) {
+void StackTraceElement::Init(Handle<String> declaring_class, Handle<String> method_name,
+                             Handle<String> file_name, int32_t line_number) {
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
-                                     declaring_class.get());
+                                     declaring_class.Get());
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
-                                     method_name.get());
+                                     method_name.Get());
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
-                                     file_name.get());
+                                     file_name.Get());
   SetField32<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_),
                                  line_number);
 }
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 1acbbb0..e094e8b 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -22,7 +22,7 @@
 
 namespace art {
 
-template<class T> class SirtRef;
+template<class T> class Handle;
 struct StackTraceElementOffsets;
 
 namespace mirror {
@@ -46,10 +46,8 @@
     return GetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_));
   }
 
-  static StackTraceElement* Alloc(Thread* self,
-                                  SirtRef<String>& declaring_class,
-                                  SirtRef<String>& method_name,
-                                  SirtRef<String>& file_name,
+  static StackTraceElement* Alloc(Thread* self, Handle<String> declaring_class,
+                                  Handle<String> method_name, Handle<String> file_name,
                                   int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -57,6 +55,10 @@
   static void ResetClass();
   static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static Class* GetStackTraceElement() {
+    DCHECK(java_lang_StackTraceElement_ != NULL);
+    return java_lang_StackTraceElement_;
+  }
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
@@ -66,15 +68,10 @@
   int32_t line_number_;
 
   template<bool kTransactionActive>
-  void Init(SirtRef<String>& declaring_class, SirtRef<String>& method_name,
-            SirtRef<String>& file_name, int32_t line_number)
+  void Init(Handle<String> declaring_class, Handle<String> method_name, Handle<String> file_name,
+            int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Class* GetStackTraceElement() {
-    DCHECK(java_lang_StackTraceElement_ != NULL);
-    return java_lang_StackTraceElement_;
-  }
-
   static Class* java_lang_StackTraceElement_;
 
   friend struct art::StackTraceElementOffsets;  // for verifying offset information
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 6a0c225..1d79106 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -22,7 +22,7 @@
 #include "intern_table.h"
 #include "object-inl.h"
 #include "runtime.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "utf-inl.h"
 
@@ -123,18 +123,19 @@
 }
 
 String* String::Alloc(Thread* self, int32_t utf16_length) {
-  SirtRef<CharArray> array(self, CharArray::Alloc(self, utf16_length));
-  if (UNLIKELY(array.get() == nullptr)) {
+  StackHandleScope<1> hs(self);
+  Handle<CharArray> array(hs.NewHandle(CharArray::Alloc(self, utf16_length)));
+  if (UNLIKELY(array.Get() == nullptr)) {
     return nullptr;
   }
   return Alloc(self, array);
 }
 
-String* String::Alloc(Thread* self, const SirtRef<CharArray>& array) {
+String* String::Alloc(Thread* self, Handle<CharArray> array) {
   // Hold reference in case AllocObject causes GC.
   String* string = down_cast<String*>(GetJavaLangString()->AllocObject(self));
   if (LIKELY(string != nullptr)) {
-    string->SetArray(array.get());
+    string->SetArray(array.Get());
     string->SetCount(array->GetLength());
   }
   return string;
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index f97308e..6c3015f 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -24,7 +24,7 @@
 
 namespace art {
 
-template<class T> class SirtRef;
+template<class T> class Handle;
 struct StringClassOffsets;
 struct StringOffsets;
 class StringPiece;
@@ -137,7 +137,7 @@
   static String* Alloc(Thread* self, int32_t utf16_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* Alloc(Thread* self, const SirtRef<CharArray>& array)
+  static String* Alloc(Thread* self, Handle<CharArray> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index d393a13..6874fe5 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -24,6 +24,7 @@
 #include "object_array.h"
 #include "object_array-inl.h"
 #include "object_utils.h"
+#include "stack_trace_element.h"
 #include "utils.h"
 #include "well_known_classes.h"
 
@@ -53,6 +54,15 @@
   }
 }
 
+void Throwable::SetStackState(Object* state) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  CHECK(state != nullptr);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+  } else {
+    SetFieldObjectVolatile<false>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), state);
+  }
+}
+
 bool Throwable::IsCheckedException() {
   if (InstanceOf(WellKnownClasses::ToClass(WellKnownClasses::java_lang_Error))) {
     return false;
@@ -70,24 +80,49 @@
   result += "\n";
   Object* stack_state = GetStackState();
   // check stack state isn't missing or corrupt
-  if (stack_state != NULL && stack_state->IsObjectArray()) {
+  if (stack_state != nullptr && stack_state->IsObjectArray()) {
     // Decode the internal stack trace into the depth and method trace
     ObjectArray<Object>* method_trace = down_cast<ObjectArray<Object>*>(stack_state);
     int32_t depth = method_trace->GetLength() - 1;
     IntArray* pc_trace = down_cast<IntArray*>(method_trace->Get(depth));
     MethodHelper mh;
-    for (int32_t i = 0; i < depth; ++i) {
-      ArtMethod* method = down_cast<ArtMethod*>(method_trace->Get(i));
-      mh.ChangeMethod(method);
-      uint32_t dex_pc = pc_trace->Get(i);
-      int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
-      const char* source_file = mh.GetDeclaringClassSourceFile();
-      result += StringPrintf("  at %s (%s:%d)\n", PrettyMethod(method, true).c_str(),
-                             source_file, line_number);
+    if (depth == 0) {
+      result += "(Throwable with empty stack trace)";
+    } else {
+      for (int32_t i = 0; i < depth; ++i) {
+        ArtMethod* method = down_cast<ArtMethod*>(method_trace->Get(i));
+        mh.ChangeMethod(method);
+        uint32_t dex_pc = pc_trace->Get(i);
+        int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
+        const char* source_file = mh.GetDeclaringClassSourceFile();
+        result += StringPrintf("  at %s (%s:%d)\n", PrettyMethod(method, true).c_str(),
+                               source_file, line_number);
+      }
+    }
+  } else {
+    Object* stack_trace = GetStackTrace();
+    if (stack_trace != nullptr && stack_trace->IsObjectArray()) {
+      CHECK_EQ(stack_trace->GetClass()->GetComponentType(),
+               StackTraceElement::GetStackTraceElement());
+      ObjectArray<StackTraceElement>* ste_array =
+          down_cast<ObjectArray<StackTraceElement>*>(stack_trace);
+      if (ste_array->GetLength() == 0) {
+        result += "(Throwable with empty stack trace)";
+      } else {
+        for (int32_t i = 0; i < ste_array->GetLength(); ++i) {
+          StackTraceElement* ste = ste_array->Get(i);
+          result += StringPrintf("  at %s (%s:%d)\n",
+                                 ste->GetMethodName()->ToModifiedUtf8().c_str(),
+                                 ste->GetFileName()->ToModifiedUtf8().c_str(),
+                                 ste->GetLineNumber());
+        }
+      }
+    } else {
+      result += "(Throwable with no stack trace)";
     }
   }
   Throwable* cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_));
-  if (cause != NULL && cause != this) {  // Constructor makes cause == this by default.
+  if (cause != nullptr && cause != this) {  // Constructor makes cause == this by default.
     result += "Caused by: ";
     result += cause->Dump();
   }
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 950b5e7..c4127e0 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -42,6 +42,7 @@
   // overridden. Also it asserts rather than throwing exceptions. Currently this is only used
   // in cases like the verifier where the checks cannot fail and initCause isn't overridden.
   void SetCause(Throwable* cause) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetStackState(Object* state) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsCheckedException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangThrowable() {
@@ -58,6 +59,9 @@
   Object* GetStackState() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_));
   }
+  Object* GetStackTrace() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_trace_));
+  }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Throwable> cause_;
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 38b77d1..58e6dd4 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -99,19 +99,19 @@
 
 int32_t Monitor::GetHashCode() {
   while (!HasHashCode()) {
-    if (hash_code_.CompareAndSwap(0, mirror::Object::GenerateIdentityHashCode())) {
+    if (hash_code_.CompareExchangeWeakRelaxed(0, mirror::Object::GenerateIdentityHashCode())) {
       break;
     }
   }
   DCHECK(HasHashCode());
-  return hash_code_.Load();
+  return hash_code_.LoadRelaxed();
 }
 
 bool Monitor::Install(Thread* self) {
   MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
   CHECK(owner_ == nullptr || owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  LockWord lw(obj_->GetLockWord(false));
+  LockWord lw(GetObject()->GetLockWord(false));
   switch (lw.GetState()) {
     case LockWord::kThinLocked: {
       CHECK_EQ(owner_->GetThreadId(), lw.ThinLockOwner());
@@ -119,7 +119,7 @@
       break;
     }
     case LockWord::kHashCode: {
-      CHECK_EQ(hash_code_, static_cast<int32_t>(lw.GetHashCode()));
+      CHECK_EQ(hash_code_.LoadRelaxed(), static_cast<int32_t>(lw.GetHashCode()));
       break;
     }
     case LockWord::kFatLocked: {
@@ -137,7 +137,7 @@
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
-  bool success = obj_->CasLockWord(lw, fat);
+  bool success = GetObject()->CasLockWord(lw, fat);
   // Lock profiling.
   if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) {
     locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
@@ -226,9 +226,9 @@
     // Do this before releasing the lock so that we don't get deflated.
     ++num_waiters_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
+    self->SetMonitorEnterObject(GetObject());
     {
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
-      self->SetMonitorEnterObject(obj_);
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
         monitor_contenders_.Wait(self);  // Still contended so wait.
@@ -249,8 +249,8 @@
           }
         }
       }
-      self->SetMonitorEnterObject(nullptr);
     }
+    self->SetMonitorEnterObject(nullptr);
     monitor_lock_.Lock(self);  // Reacquire locks in order.
     --num_waiters_;
   }
@@ -363,7 +363,7 @@
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
     // in this case.
-    FailedUnlock(obj_, self, owner, this);
+    FailedUnlock(GetObject(), self, owner, this);
     return false;
   }
   return true;
@@ -623,40 +623,46 @@
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
   // Allocate and acquire a new monitor.
-  UniquePtr<Monitor> m(new Monitor(self, owner, obj, hash_code));
+  std::unique_ptr<Monitor> m(new Monitor(self, owner, obj, hash_code));
   if (m->Install(self)) {
-    VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
-                    << " created monitor " << m.get() << " for object " << obj;
+    if (owner != nullptr) {
+      VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
+          << " created monitor " << m.get() << " for object " << obj;
+    } else {
+      VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
+          << " created monitor " << m.get() << " for object " << obj;
+    }
     Runtime::Current()->GetMonitorList()->Add(m.release());
     CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
   }
 }
 
-void Monitor::InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
+void Monitor::InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
                                 uint32_t hash_code) {
   DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
   uint32_t owner_thread_id = lock_word.ThinLockOwner();
   if (owner_thread_id == self->GetThreadId()) {
     // We own the monitor, we can easily inflate it.
-    Inflate(self, self, obj.get(), hash_code);
+    Inflate(self, self, obj.Get(), hash_code);
   } else {
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
-    ScopedThreadStateChange tsc(self, kBlocked);
-    self->SetMonitorEnterObject(obj.get());
-    if (lock_word == obj->GetLockWord(true)) {  // If lock word hasn't changed.
-      bool timed_out;
-      Thread* owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
-      if (owner != nullptr) {
-        // We succeeded in suspending the thread, check the lock's status didn't change.
-        lock_word = obj->GetLockWord(true);
-        if (lock_word.GetState() == LockWord::kThinLocked &&
-            lock_word.ThinLockOwner() == owner_thread_id) {
-          // Go ahead and inflate the lock.
-          Inflate(self, owner, obj.get(), hash_code);
-        }
-        thread_list->Resume(owner, false);
+    self->SetMonitorEnterObject(obj.Get());
+    bool timed_out;
+    Thread* owner;
+    {
+      ScopedThreadStateChange tsc(self, kBlocked);
+      owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
+    }
+    if (owner != nullptr) {
+      // We succeeded in suspending the thread, check the lock's status didn't change.
+      lock_word = obj->GetLockWord(true);
+      if (lock_word.GetState() == LockWord::kThinLocked &&
+          lock_word.ThinLockOwner() == owner_thread_id) {
+        // Go ahead and inflate the lock.
+        Inflate(self, owner, obj.Get(), hash_code);
       }
+      thread_list->Resume(owner, false);
     }
     self->SetMonitorEnterObject(nullptr);
   }
@@ -680,15 +686,16 @@
   obj = FakeLock(obj);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
-  SirtRef<mirror::Object> sirt_obj(self, obj);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> h_obj(hs.NewHandle(obj));
   while (true) {
-    LockWord lock_word = sirt_obj->GetLockWord(true);
+    LockWord lock_word = h_obj->GetLockWord(true);
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
-        if (sirt_obj->CasLockWord(lock_word, thin_locked)) {
+        if (h_obj->CasLockWord(lock_word, thin_locked)) {
           QuasiAtomic::MembarLoadLoad();
-          return sirt_obj.get();  // Success!
+          return h_obj.Get();  // Success!
         }
         continue;  // Go again.
       }
@@ -699,11 +706,11 @@
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
             LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-            sirt_obj->SetLockWord(thin_locked, true);
-            return sirt_obj.get();  // Success!
+            h_obj->SetLockWord(thin_locked, true);
+            return h_obj.Get();  // Success!
           } else {
             // We'd overflow the recursion count, so inflate the monitor.
-            InflateThinLocked(self, sirt_obj, lock_word, 0);
+            InflateThinLocked(self, h_obj, lock_word, 0);
           }
         } else {
           // Contention.
@@ -713,7 +720,7 @@
             NanoSleep(1000);  // Sleep for 1us and re-attempt.
           } else {
             contention_count = 0;
-            InflateThinLocked(self, sirt_obj, lock_word, 0);
+            InflateThinLocked(self, h_obj, lock_word, 0);
           }
         }
         continue;  // Start from the beginning.
@@ -721,15 +728,15 @@
       case LockWord::kFatLocked: {
         Monitor* mon = lock_word.FatLockMonitor();
         mon->Lock(self);
-        return sirt_obj.get();  // Success!
+        return h_obj.Get();  // Success!
       }
       case LockWord::kHashCode:
         // Inflate with the existing hashcode.
-        Inflate(self, nullptr, sirt_obj.get(), lock_word.GetHashCode());
+        Inflate(self, nullptr, h_obj.Get(), lock_word.GetHashCode());
         continue;  // Start from the beginning.
       default: {
         LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
-        return sirt_obj.get();
+        return h_obj.Get();
       }
     }
   }
@@ -740,12 +747,13 @@
   DCHECK(obj != NULL);
   obj = FakeUnlock(obj);
   LockWord lock_word = obj->GetLockWord(true);
-  SirtRef<mirror::Object> sirt_obj(self, obj);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> h_obj(hs.NewHandle(obj));
   switch (lock_word.GetState()) {
     case LockWord::kHashCode:
       // Fall-through.
     case LockWord::kUnlocked:
-      FailedUnlock(sirt_obj.get(), self, nullptr, nullptr);
+      FailedUnlock(h_obj.Get(), self, nullptr, nullptr);
       return false;  // Failure.
     case LockWord::kThinLocked: {
       uint32_t thread_id = self->GetThreadId();
@@ -754,16 +762,16 @@
         // TODO: there's a race here with the owner dying while we unlock.
         Thread* owner =
             Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-        FailedUnlock(sirt_obj.get(), self, owner, nullptr);
+        FailedUnlock(h_obj.Get(), self, owner, nullptr);
         return false;  // Failure.
       } else {
         // We own the lock, decrease the recursion count.
         if (lock_word.ThinLockCount() != 0) {
           uint32_t new_count = lock_word.ThinLockCount() - 1;
           LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-          sirt_obj->SetLockWord(thin_locked, true);
+          h_obj->SetLockWord(thin_locked, true);
         } else {
-          sirt_obj->SetLockWord(LockWord(), true);
+          h_obj->SetLockWord(LockWord(), true);
         }
         return true;  // Success!
       }
@@ -887,7 +895,7 @@
     MutexLock mu(self, *thread->GetWaitMutex());
     Monitor* monitor = thread->GetWaitMonitor();
     if (monitor != nullptr) {
-      pretty_object = monitor->obj_;
+      pretty_object = monitor->GetObject();
     }
   } else if (state == kBlocked) {
     wait_message = "  - waiting to lock ";
@@ -946,7 +954,7 @@
   // TODO: use the JNI implementation's table of explicit MonitorEnter calls and dump those too.
   if (m->IsNative()) {
     if (m->IsSynchronized()) {
-      mirror::Object* jni_this = stack_visitor->GetCurrentSirt()->GetReference(0);
+      mirror::Object* jni_this = stack_visitor->GetCurrentHandleScope()->GetReference(0);
       callback(jni_this, callback_context);
     }
     return;
@@ -975,7 +983,7 @@
   // Ask the verifier for the dex pcs of all the monitor-enter instructions corresponding to
   // the locks held in this stack frame.
   std::vector<uint32_t> monitor_enter_dex_pcs;
-  verifier::MethodVerifier::FindLocksAtDexPc(m, stack_visitor->GetDexPc(), monitor_enter_dex_pcs);
+  verifier::MethodVerifier::FindLocksAtDexPc(m, stack_visitor->GetDexPc(), &monitor_enter_dex_pcs);
   if (monitor_enter_dex_pcs.empty()) {
     return;
   }
@@ -1093,12 +1101,13 @@
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
-    mirror::Object* obj = m->GetObject();
+    // Disable the read barrier in GetObject() as this is called by GC.
+    mirror::Object* obj = m->GetObject<kWithoutReadBarrier>();
     // The object of a monitor can be null if we have deflated it.
     mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
-                    << m->GetObject();
+                    << obj;
       delete m;
       it = list_.erase(it);
     } else {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 15620d5..9e6d255 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,12 +27,13 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 #include "thread_state.h"
 
 namespace art {
 
 class LockWord;
-template<class T> class SirtRef;
+template<class T> class Handle;
 class Thread;
 class StackVisitor;
 typedef uint32_t MonitorId;
@@ -92,8 +93,9 @@
 
   static bool IsValidLockWord(LockWord lock_word);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return obj_;
+    return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(obj_);
   }
 
   void SetObject(mirror::Object* object);
@@ -107,14 +109,14 @@
   bool IsLocked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasHashCode() const {
-    return hash_code_.Load() != 0;
+    return hash_code_.LoadRelaxed() != 0;
   }
 
   MonitorId GetMonitorId() const {
     return monitor_id_;
   }
 
-  static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
+  static void InflateThinLocked(Thread* self, Handle<mirror::Object> obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
   static bool Deflate(Thread* self, mirror::Object* obj)
@@ -190,7 +192,9 @@
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of.
+  // What object are we part of. This is a weak root. Do not access
+  // this directly, use GetObject() to read it so it will be guarded
+  // by a read barrier.
   mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index d9c1309..8f74dd7 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include <fcntl.h>
 #include <set>
+#include <fcntl.h>
 #include <unistd.h>
 
 #include "base/logging.h"
@@ -37,7 +37,7 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
-#include "toStringArray.h"
+#include "well_known_classes.h"
 #include "zip_archive.h"
 
 #ifdef HAVE_ANDROID_OS
@@ -115,7 +115,9 @@
   if (outputName.c_str() == nullptr) {
     // FindOrCreateOatFileForDexLocation can tolerate a missing dex_location_checksum
     dex_file = linker->FindDexFileInOatFileFromDexLocation(sourceName.c_str(),
-                                                           dex_location_checksum_pointer, &error_msgs);
+                                                           dex_location_checksum_pointer,
+                                                           kRuntimeISA,
+                                                           &error_msgs);
   } else {
     // FindOrCreateOatFileForDexLocation requires the dex_location_checksum
     if (dex_location_checksum_pointer == NULL) {
@@ -186,7 +188,9 @@
   ScopedObjectAccess soa(env);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->RegisterDexFile(*dex_file);
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(javaLoader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
   mirror::Class* result = class_linker->DefineClass(descriptor.c_str(), class_loader, *dex_file,
                                                     *dex_class_def);
   VLOG(class_linker) << "DexFile_defineClassNative returning " << result;
@@ -194,19 +198,24 @@
 }
 
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
-  const DexFile* dex_file;
-  dex_file = toDexFile(cookie, env);
-  if (dex_file == nullptr) {
-    return nullptr;
+  jobjectArray result = nullptr;
+  const DexFile* dex_file = toDexFile(cookie, env);
+  if (dex_file != nullptr) {
+    result = env->NewObjectArray(dex_file->NumClassDefs(), WellKnownClasses::java_lang_String,
+                                 nullptr);
+    if (result != nullptr) {
+      for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        ScopedLocalRef<jstring> jdescriptor(env, env->NewStringUTF(descriptor));
+        if (jdescriptor.get() == nullptr) {
+          return nullptr;
+        }
+        env->SetObjectArrayElement(result, i, jdescriptor.get());
+      }
+    }
   }
-
-  std::vector<std::string> class_names;
-  for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-    const char* descriptor = dex_file->GetClassDescriptor(class_def);
-    class_names.push_back(DescriptorToDot(descriptor));
-  }
-  return toStringArray(env, class_names);
+  return result;
 }
 
 // Copy a profile file
@@ -290,6 +299,52 @@
     }
   }
 
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
+
+  // Check if we have an odex file next to the dex file.
+  std::string odex_filename(DexFilenameToOdexFilename(filename, kRuntimeISA));
+  std::string error_msg;
+  std::unique_ptr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
+                                                        &error_msg));
+  if (oat_file.get() == nullptr) {
+    if (kVerboseLogging) {
+      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
+          << "': " << error_msg;
+    }
+    error_msg.clear();
+  } else {
+    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
+                                                                           kReasonLogging);
+    if (oat_dex_file != nullptr) {
+      uint32_t location_checksum;
+      // If its not possible to read the classes.dex assume up-to-date as we won't be able to
+      // compile it anyway.
+      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
+              << filename << ": " << error_msg;
+        }
+        return JNI_FALSE;
+      }
+      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
+                                              target_instruction_set,
+                                              &error_msg)) {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
+              << " has an up-to-date checksum compared to " << filename;
+        }
+        return JNI_FALSE;
+      } else {
+        if (kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
+              << " with an out-of-date checksum compared to " << filename
+              << ": " << error_msg;
+        }
+        error_msg.clear();
+      }
+    }
+  }
+
   // Check the profile file.  We need to rerun dex2oat if the profile has changed significantly
   // since the last time, or it's new.
   // If the 'defer' argument is true then this will be retried later.  In this case we
@@ -377,49 +432,6 @@
     }
   }
 
-  // Check if we have an odex file next to the dex file.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename));
-  std::string error_msg;
-  UniquePtr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
-                                                  &error_msg));
-  if (oat_file.get() == nullptr) {
-    if (kVerboseLogging) {
-      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
-          << "': " << error_msg;
-    }
-    error_msg.clear();
-  } else {
-    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
-                                                                           kReasonLogging);
-    if (oat_dex_file != nullptr) {
-      uint32_t location_checksum;
-      // If its not possible to read the classes.dex assume up-to-date as we won't be able to
-      // compile it anyway.
-      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
-              << filename << ": " << error_msg;
-        }
-        return JNI_FALSE;
-      }
-      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
-                                              &error_msg)) {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
-              << " has an up-to-date checksum compared to " << filename;
-        }
-        return JNI_FALSE;
-      } else {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
-              << " with an out-of-date checksum compared to " << filename
-              << ": " << error_msg;
-        }
-        error_msg.clear();
-      }
-    }
-  }
-
   // Check if we have an oat file in the cache
   const std::string cache_dir(GetDalvikCacheOrDie(instruction_set));
   const std::string cache_location(
@@ -433,33 +445,6 @@
     return JNI_TRUE;
   }
 
-  for (const auto& space : runtime->GetHeap()->GetContinuousSpaces()) {
-    if (space->IsImageSpace()) {
-      // TODO: Ensure this works with multiple image spaces.
-      const ImageHeader& image_header = space->AsImageSpace()->GetImageHeader();
-      if (oat_file->GetOatHeader().GetImageFileLocationOatChecksum() !=
-          image_header.GetOatChecksum()) {
-        if (kReasonLogging) {
-          ScopedObjectAccess soa(env);
-          LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-              << " has out-of-date oat checksum compared to "
-              << oat_file->GetLocation();
-        }
-        return JNI_TRUE;
-      }
-      if (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin()
-          != reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin())) {
-        if (kReasonLogging) {
-          ScopedObjectAccess soa(env);
-          LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-              << " has out-of-date oat begin compared to "
-              << oat_file->GetLocation();
-        }
-        return JNI_TRUE;
-      }
-    }
-  }
-
   uint32_t location_checksum;
   if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
     if (kReasonLogging) {
@@ -470,7 +455,7 @@
   }
 
   if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
-                                           &error_msg)) {
+                                           target_instruction_set, &error_msg)) {
     if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
           << " has out-of-date checksum compared to " << filename
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 0b58af4..ceff206 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -28,21 +28,35 @@
 #include "hprof/hprof.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
+#include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access.h"
-#include "toStringArray.h"
 #include "trace.h"
+#include "well_known_classes.h"
 
 namespace art {
 
 static jobjectArray VMDebug_getVmFeatureList(JNIEnv* env, jclass) {
-  std::vector<std::string> features;
-  features.push_back("method-trace-profiling");
-  features.push_back("method-trace-profiling-streaming");
-  features.push_back("method-sample-profiling");
-  features.push_back("hprof-heap-dump");
-  features.push_back("hprof-heap-dump-streaming");
-  return toStringArray(env, features);
+  static const char* features[] = {
+    "method-trace-profiling",
+    "method-trace-profiling-streaming",
+    "method-sample-profiling",
+    "hprof-heap-dump",
+    "hprof-heap-dump-streaming",
+  };
+  jobjectArray result = env->NewObjectArray(arraysize(features),
+                                            WellKnownClasses::java_lang_String,
+                                            nullptr);
+  if (result != nullptr) {
+    for (size_t i = 0; i < arraysize(features); ++i) {
+      ScopedLocalRef<jstring> jfeature(env, env->NewStringUTF(features[i]));
+      if (jfeature.get() == nullptr) {
+        return nullptr;
+      }
+      env->SetObjectArrayElement(result, i, jfeature.get());
+    }
+  }
+  return result;
 }
 
 static void VMDebug_startAllocCounting(JNIEnv*, jclass) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 76c5866..d55b545 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -66,10 +66,6 @@
     ThrowNullPointerException(NULL, "element class == null");
     return nullptr;
   }
-  if (UNLIKELY(element_class->IsPrimitiveVoid())) {
-    ThrowIllegalArgumentException(NULL, "Can't allocate an array of void");
-    return nullptr;
-  }
   Runtime* runtime = Runtime::Current();
   mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(soa.Self(), element_class);
   if (UNLIKELY(array_class == nullptr)) {
@@ -93,10 +89,6 @@
     ThrowNullPointerException(NULL, "element class == null");
     return nullptr;
   }
-  if (UNLIKELY(element_class->IsPrimitiveVoid())) {
-    ThrowIllegalArgumentException(NULL, "Can't allocate an array of void");
-    return nullptr;
-  }
   Runtime* runtime = Runtime::Current();
   mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(soa.Self(), element_class);
   if (UNLIKELY(array_class == nullptr)) {
@@ -163,6 +155,21 @@
   return env->NewStringUTF(kIsDebugBuild ? "libartd.so" : "libart.so");
 }
 
+static jstring VMRuntime_vmInstructionSet(JNIEnv* env, jobject) {
+  InstructionSet isa = Runtime::Current()->GetInstructionSet();
+  const char* isa_string = GetInstructionSetString(isa);
+  return env->NewStringUTF(isa_string);
+}
+
+static jboolean VMRuntime_is64Bit(JNIEnv* env, jobject) {
+  bool is64BitMode = (sizeof(void*) == sizeof(uint64_t));
+  return is64BitMode ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean VMRuntime_isCheckJniEnabled(JNIEnv* env, jobject) {
+  return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE;
+}
+
 static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) {
   // This is the target SDK version of the app we're about to run. It is intended that this a place
   // where workarounds can be enabled.
@@ -214,7 +221,7 @@
 }
 
 // Based on ClassLinker::ResolveString.
-static void PreloadDexCachesResolveString(SirtRef<mirror::DexCache>& dex_cache, uint32_t string_idx,
+static void PreloadDexCachesResolveString(Handle<mirror::DexCache> dex_cache, uint32_t string_idx,
                                           StringTable& strings)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::String* string = dex_cache->GetResolvedString(string_idx);
@@ -260,8 +267,7 @@
 }
 
 // Based on ClassLinker::ResolveField.
-static void PreloadDexCachesResolveField(SirtRef<mirror::DexCache>& dex_cache,
-                                         uint32_t field_idx,
+static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uint32_t field_idx,
                                          bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
@@ -270,14 +276,16 @@
   }
   const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::FieldId& field_id = dex_file->GetFieldId(field_idx);
-  mirror::Class* klass = dex_cache->GetResolvedType(field_id.class_idx_);
-  if (klass == NULL) {
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(dex_cache->GetResolvedType(field_id.class_idx_)));
+  if (klass.Get() == NULL) {
     return;
   }
   if (is_static) {
-    field = klass->FindStaticField(dex_cache.get(), field_idx);
+    field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
   } else {
-    field = klass->FindInstanceField(dex_cache.get(), field_idx);
+    field = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
   if (field == NULL) {
     return;
@@ -287,8 +295,7 @@
 }
 
 // Based on ClassLinker::ResolveMethod.
-static void PreloadDexCachesResolveMethod(SirtRef<mirror::DexCache>& dex_cache,
-                                          uint32_t method_idx,
+static void PreloadDexCachesResolveMethod(Handle<mirror::DexCache> dex_cache, uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = dex_cache->GetResolvedMethod(method_idx);
@@ -304,14 +311,14 @@
   switch (invoke_type) {
     case kDirect:
     case kStatic:
-      method = klass->FindDirectMethod(dex_cache.get(), method_idx);
+      method = klass->FindDirectMethod(dex_cache.Get(), method_idx);
       break;
     case kInterface:
-      method = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
+      method = klass->FindInterfaceMethod(dex_cache.Get(), method_idx);
       break;
     case kSuper:
     case kVirtual:
-      method = klass->FindVirtualMethod(dex_cache.get(), method_idx);
+      method = klass->FindVirtualMethod(dex_cache.Get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
@@ -434,7 +441,8 @@
   for (size_t i = 0; i< boot_class_path.size(); i++) {
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != NULL);
-    SirtRef<mirror::DexCache> dex_cache(self, linker->FindDexCache(*dex_file));
+    StackHandleScope<1> hs(self);
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(*dex_file)));
 
     if (kPreloadDexCachesStrings) {
       for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
@@ -444,7 +452,7 @@
 
     if (kPreloadDexCachesTypes) {
       for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-        PreloadDexCachesResolveType(dex_cache.get(), i);
+        PreloadDexCachesResolveType(dex_cache.Get(), i);
       }
     }
 
@@ -534,6 +542,9 @@
   NATIVE_METHOD(VMRuntime, trimHeap, "()V"),
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"),
+  NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
 };
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1008491..7490e6a 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -58,12 +58,12 @@
     Runtime* runtime = Runtime::Current();
     JavaVMExt* vm = runtime->GetJavaVM();
     if (!vm->check_jni) {
-      LOG(DEBUG) << "Late-enabling -Xcheck:jni";
+      LOG(INFO) << "Late-enabling -Xcheck:jni";
       vm->SetCheckJniEnabled(true);
       // There's only one thread running at this point, so only one JNIEnv to fix up.
       Thread::Current()->GetJniEnv()->SetCheckJniEnabled(true);
     } else {
-      LOG(DEBUG) << "Not late-enabling -Xcheck:jni (already on)";
+      LOG(INFO) << "Not late-enabling -Xcheck:jni (already on)";
     }
     debug_flags &= ~DEBUG_ENABLE_CHECKJNI;
   }
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 6daf9a9..e619dda 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -62,12 +62,12 @@
   }
 
   std::string descriptor(DotToDescriptor(name.c_str()));
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(javaLoader));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  SirtRef<mirror::Class> c(soa.Self(), class_linker->FindClass(soa.Self(), descriptor.c_str(),
-                                                               class_loader));
-  if (c.get() == nullptr) {
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor.c_str(), class_loader)));
+  if (c.Get() == nullptr) {
     ScopedLocalRef<jthrowable> cause(env, env->ExceptionOccurred());
     env->ExceptionClear();
     jthrowable cnfe = reinterpret_cast<jthrowable>(env->NewObject(WellKnownClasses::java_lang_ClassNotFoundException,
@@ -79,13 +79,14 @@
   if (initialize) {
     class_linker->EnsureInitialized(c, true, true);
   }
-  return soa.AddLocalReference<jclass>(c.get());
+  return soa.AddLocalReference<jclass>(c.Get());
 }
 
 static jstring Class_getNameNative(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Class* c = DecodeClass(soa, javaThis);
-  return soa.AddLocalReference<jstring>(c->ComputeName());
+  StackHandleScope<1> hs(soa.Self());
+  mirror::Class* const c = DecodeClass(soa, javaThis);
+  return soa.AddLocalReference<jstring>(mirror::Class::ComputeName(hs.NewHandle(c)));
 }
 
 static jobjectArray Class_getProxyInterfaces(JNIEnv* env, jobject javaThis) {
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 636be5d..496a1b2 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -19,12 +19,13 @@
 #include <unistd.h>
 
 #include "gc/heap.h"
+#include "handle_scope-inl.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
-#include "sirt_ref-inl.h"
+#include "verify_object-inl.h"
 
 namespace art {
 
@@ -65,8 +66,9 @@
   std::string detail;
   {
     ScopedObjectAccess soa(env);
-    SirtRef<mirror::ClassLoader> classLoader(soa.Self(),
-                                             soa.Decode<mirror::ClassLoader*>(javaLoader));
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> classLoader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
     bool success = vm->LoadNativeLibrary(filename.c_str(), classLoader, &detail);
     if (success) {
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index cb8e623..f2b8a03 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -73,12 +73,12 @@
   const DexFile* dex_file = path[index];
   const std::string& location(dex_file->GetLocation());
   std::string error_msg;
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(location.c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(location.c_str(), &error_msg));
   if (zip_archive.get() == nullptr) {
     LOG(WARNING) << "Failed to open zip archive '" << location << "': " << error_msg;
     return NULL;
   }
-  UniquePtr<ZipEntry> zip_entry(zip_archive->Find(name.c_str(), &error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(name.c_str(), &error_msg));
   if (zip_entry.get() == NULL) {
     return NULL;
   }
diff --git a/runtime/native/java_lang_ref_Reference.cc b/runtime/native/java_lang_ref_Reference.cc
new file mode 100644
index 0000000..f221ac6
--- /dev/null
+++ b/runtime/native/java_lang_ref_Reference.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gc/heap.h"
+#include "gc/reference_processor.h"
+#include "jni_internal.h"
+#include "mirror/object-inl.h"
+#include "mirror/reference-inl.h"
+#include "scoped_fast_native_object_access.h"
+
+namespace art {
+
+static jobject Reference_get(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Reference* const ref = soa.Decode<mirror::Reference*>(javaThis);
+  mirror::Object* const referent =
+      Runtime::Current()->GetHeap()->GetReferenceProcessor()->GetReferent(soa.Self(), ref);
+  return soa.AddLocalReference<jobject>(referent);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(Reference, get, "!()Ljava/lang/Object;"),
+};
+
+void register_java_lang_ref_Reference(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/ref/Reference");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index a991818..db77437 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -22,21 +22,22 @@
 #include "mirror/object-inl.h"
 #include "object_utils.h"
 #include "scoped_fast_native_object_access.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
 static jobject Array_createMultiArray(JNIEnv* env, jclass, jclass javaElementClass, jobject javaDimArray) {
   ScopedFastNativeObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  SirtRef<mirror::Class> element_class(soa.Self(), soa.Decode<mirror::Class*>(javaElementClass));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> element_class(hs.NewHandle(soa.Decode<mirror::Class*>(javaElementClass)));
   DCHECK(element_class->IsClass());
   DCHECK(javaDimArray != NULL);
   mirror::Object* dimensions_obj = soa.Decode<mirror::Object*>(javaDimArray);
   DCHECK(dimensions_obj->IsArrayInstance());
-  DCHECK_STREQ(ClassHelper(dimensions_obj->GetClass()).GetDescriptor(), "[I");
-  SirtRef<mirror::IntArray> dimensions_array(soa.Self(),
-                                             down_cast<mirror::IntArray*>(dimensions_obj));
+  DCHECK_STREQ(dimensions_obj->GetClass()->GetDescriptor().c_str(), "[I");
+  Handle<mirror::IntArray> dimensions_array(
+      hs.NewHandle(down_cast<mirror::IntArray*>(dimensions_obj)));
   mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class,
                                                              dimensions_array);
   return soa.AddLocalReference<jobject>(new_array);
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 2445b53..1981bfd 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -38,13 +38,14 @@
                                        jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
-  SirtRef<mirror::Class> c(soa.Self(), m->GetDeclaringClass());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> c(hs.NewHandle(m->GetDeclaringClass()));
   if (UNLIKELY(c->IsAbstract())) {
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/InstantiationException;",
                                    "Can't instantiate %s %s",
                                    c->IsInterface() ? "interface" : "abstract class",
-                                   PrettyDescriptor(c.get()).c_str());
+                                   PrettyDescriptor(c.Get()).c_str());
     return nullptr;
   }
 
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index ce622d9..0d54772 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -94,13 +94,14 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   soa.Self()->AssertThreadSuspensionIsAllowable();
   if (f->IsStatic()) {
-    SirtRef<mirror::Class> sirt_klass(soa.Self(), f->GetDeclaringClass());
-    if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true))) {
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Class> h_klass(hs.NewHandle(f->GetDeclaringClass()));
+    if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true))) {
       DCHECK(soa.Self()->IsExceptionPending());
       *class_or_rcvr = nullptr;
       return false;
     }
-    *class_or_rcvr = sirt_klass.get();
+    *class_or_rcvr = h_klass.Get();
     return true;
   }
 
@@ -271,7 +272,8 @@
     const char* field_type_desciptor = fh.GetTypeDescriptor();
     field_prim_type = Primitive::GetType(field_type_desciptor[0]);
     if (field_prim_type == Primitive::kPrimNot) {
-      SirtRef<mirror::Object> sirt_obj(soa.Self(), o);
+      StackHandleScope<1> hs(soa.Self());
+      HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&o));
       // May cause resolution.
       CHECK(!kMovingFields) << "Resolution may trigger thread suspension";
       field_type = fh.GetType(true);
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 5d90f1a..e17e60a 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -52,9 +52,15 @@
     jobject internal_trace = self->CreateInternalStackTrace<false>(soa);
     trace = Thread::InternalStackTraceToStackTraceElementArray(soa, internal_trace);
   } else {
-    // Suspend thread to build stack trace.
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     bool timed_out;
+
+    // Check for valid thread
+    if (thin_lock_id == ThreadList::kInvalidThreadId) {
+      return nullptr;
+    }
+
+    // Suspend thread to build stack trace.
     Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
     if (thread != nullptr) {
       {
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index 744ac05..822aefa 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -24,12 +24,12 @@
 
 // Variant of ScopedObjectAccess that does no runnable transitions. Should only be used by "fast"
 // JNI methods.
-class ScopedFastNativeObjectAccess : public ScopedObjectAccess {
+class ScopedFastNativeObjectAccess : public ScopedObjectAccessAlreadyRunnable {
  public:
   explicit ScopedFastNativeObjectAccess(JNIEnv* env)
     LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
     SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
-     : ScopedObjectAccess(env) {
+     : ScopedObjectAccessAlreadyRunnable(env) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsFastNative());
     // Don't work with raw objects in non-runnable states.
diff --git a/runtime/oat.cc b/runtime/oat.cc
index c1a48e9..10d335e 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '2', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
@@ -345,40 +345,34 @@
 
 OatMethodOffsets::OatMethodOffsets()
   : code_offset_(0),
-    frame_size_in_bytes_(0),
-    core_spill_mask_(0),
-    fp_spill_mask_(0),
     gc_map_offset_(0)
 {}
 
 OatMethodOffsets::OatMethodOffsets(uint32_t code_offset,
-                                   uint32_t frame_size_in_bytes,
-                                   uint32_t core_spill_mask,
-                                   uint32_t fp_spill_mask,
                                    uint32_t gc_map_offset
                                    )
   : code_offset_(code_offset),
-    frame_size_in_bytes_(frame_size_in_bytes),
-    core_spill_mask_(core_spill_mask),
-    fp_spill_mask_(fp_spill_mask),
     gc_map_offset_(gc_map_offset)
 {}
 
 OatMethodOffsets::~OatMethodOffsets() {}
 
-OatMethodHeader::OatMethodHeader()
+OatQuickMethodHeader::OatQuickMethodHeader()
   : mapping_table_offset_(0),
     vmap_table_offset_(0),
+    frame_info_(0, 0, 0),
     code_size_(0)
 {}
 
-OatMethodHeader::OatMethodHeader(uint32_t vmap_table_offset, uint32_t mapping_table_offset,
-                                 uint32_t code_size)
+OatQuickMethodHeader::OatQuickMethodHeader(
+    uint32_t mapping_table_offset, uint32_t vmap_table_offset, uint32_t frame_size_in_bytes,
+    uint32_t core_spill_mask, uint32_t fp_spill_mask, uint32_t code_size)
   : mapping_table_offset_(mapping_table_offset),
     vmap_table_offset_(vmap_table_offset),
+    frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
     code_size_(code_size)
 {}
 
-OatMethodHeader::~OatMethodHeader() {}
+OatQuickMethodHeader::~OatQuickMethodHeader() {}
 
 }  // namespace art
diff --git a/runtime/oat.h b/runtime/oat.h
index e9dfae9..7be768c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -22,6 +22,7 @@
 #include "base/macros.h"
 #include "dex_file.h"
 #include "instruction_set.h"
+#include "quick/quick_method_frame_info.h"
 
 namespace art {
 
@@ -137,34 +138,31 @@
   OatMethodOffsets();
 
   OatMethodOffsets(uint32_t code_offset,
-                   uint32_t frame_size_in_bytes,
-                   uint32_t core_spill_mask,
-                   uint32_t fp_spill_mask,
                    uint32_t gc_map_offset);
 
   ~OatMethodOffsets();
 
   uint32_t code_offset_;
-  uint32_t frame_size_in_bytes_;
-  uint32_t core_spill_mask_;
-  uint32_t fp_spill_mask_;
   uint32_t gc_map_offset_;
 };
 
-// OatMethodHeader precedes the raw code chunk generated by the Quick compiler.
-class PACKED(4) OatMethodHeader {
+// OatQuickMethodHeader precedes the raw code chunk generated by the Quick compiler.
+class PACKED(4) OatQuickMethodHeader {
  public:
-  OatMethodHeader();
+  OatQuickMethodHeader();
 
-  explicit OatMethodHeader(uint32_t mapping_table_offset, uint32_t vmap_table_offset,
-                           uint32_t code_size);
+  explicit OatQuickMethodHeader(uint32_t mapping_table_offset, uint32_t vmap_table_offset,
+                                uint32_t frame_size_in_bytes, uint32_t core_spill_mask,
+                                uint32_t fp_spill_mask, uint32_t code_size);
 
-  ~OatMethodHeader();
+  ~OatQuickMethodHeader();
 
   // The offset in bytes from the start of the mapping table to the end of the header.
   uint32_t mapping_table_offset_;
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
+  // The stack frame information.
+  QuickMethodFrameInfo frame_info_;
   // The code size in bytes.
   uint32_t code_size_;
 };
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 00ae797..97ca6b2 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -21,6 +21,30 @@
 
 namespace art {
 
+inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
+  const void* code = mirror::ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  if (code == nullptr) {
+    return 0u;
+  }
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FrameSizeInBytes();
+}
+
+inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
+  const void* code = mirror::ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  if (code == nullptr) {
+    return 0u;
+  }
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.CoreSpillMask();
+}
+
+inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
+  const void* code = mirror::ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  if (code == nullptr) {
+    return 0u;
+  }
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
+}
+
 inline uint32_t OatFile::OatMethod::GetMappingTableOffset() const {
   const uint8_t* mapping_table = GetMappingTable();
   return static_cast<uint32_t>(mapping_table != nullptr ? mapping_table - begin_ : 0u);
@@ -36,7 +60,7 @@
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatMethodHeader*>(code)[-1].mapping_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].mapping_table_offset_;
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
@@ -48,7 +72,7 @@
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatMethodHeader*>(code)[-1].vmap_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 56e1f05..74dfe91 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -33,17 +33,6 @@
 
 namespace art {
 
-std::string OatFile::DexFilenameToOdexFilename(const std::string& location) {
-  CHECK_GE(location.size(), 4U) << location;  // must be at least .123
-  size_t dot_index = location.size() - 3 - 1;  // 3=dex or zip or apk
-  CHECK_EQ('.', location[dot_index]) << location;
-  std::string odex_location(location);
-  odex_location.resize(dot_index + 1);
-  CHECK_EQ('.', odex_location[odex_location.size()-1]) << location << " " << odex_location;
-  odex_location += "odex";
-  return odex_location;
-}
-
 void OatFile::CheckLocation(const std::string& location) {
   CHECK(!location.empty());
 }
@@ -53,7 +42,7 @@
                              std::string* error_msg) {
   CHECK(!oat_contents.empty()) << location;
   CheckLocation(location);
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   oat_file->begin_ = &oat_contents[0];
   oat_file->end_ = &oat_contents[oat_contents.size()];
   return oat_file->Setup(error_msg) ? oat_file.release() : nullptr;
@@ -82,7 +71,7 @@
   //
   // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
   // This won't work for portable runtime execution because it doesn't process relocations.
-  UniquePtr<File> file(OS::OpenFileForReading(filename.c_str()));
+  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
   if (file.get() == NULL) {
     *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
     return NULL;
@@ -99,7 +88,7 @@
                              const std::string& location,
                              byte* requested_base,
                              std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   bool success = oat_file->Dlopen(elf_filename, requested_base, error_msg);
   if (!success) {
     return nullptr;
@@ -113,7 +102,7 @@
                               bool writable,
                               bool executable,
                               std::string* error_msg) {
-  UniquePtr<OatFile> oat_file(new OatFile(location));
+  std::unique_ptr<OatFile> oat_file(new OatFile(location));
   bool success = oat_file->ElfFileOpen(file, requested_base, writable, executable, error_msg);
   if (!success) {
     CHECK(!error_msg->empty());
@@ -464,7 +453,7 @@
   // NOTE: We don't keep the number of methods and cannot do a bounds check for method_index.
   if (methods_pointer_ == NULL) {
     CHECK_EQ(kOatClassNoneCompiled, type_);
-    return OatMethod(NULL, 0, 0, 0, 0, 0);
+    return OatMethod(NULL, 0, 0);
   }
   size_t methods_pointer_index;
   if (bitmap_ == NULL) {
@@ -473,7 +462,7 @@
   } else {
     CHECK_EQ(kOatClassSomeCompiled, type_);
     if (!BitVector::IsBitSet(bitmap_, method_index)) {
-      return OatMethod(NULL, 0, 0, 0, 0, 0);
+      return OatMethod(NULL, 0, 0);
     }
     size_t num_set_bits = BitVector::NumSetBits(bitmap_, method_index);
     methods_pointer_index = num_set_bits;
@@ -482,23 +471,14 @@
   return OatMethod(
       oat_file_->Begin(),
       oat_method_offsets.code_offset_,
-      oat_method_offsets.frame_size_in_bytes_,
-      oat_method_offsets.core_spill_mask_,
-      oat_method_offsets.fp_spill_mask_,
       oat_method_offsets.gc_map_offset_);
 }
 
 OatFile::OatMethod::OatMethod(const byte* base,
                               const uint32_t code_offset,
-                              const size_t frame_size_in_bytes,
-                              const uint32_t core_spill_mask,
-                              const uint32_t fp_spill_mask,
                               const uint32_t gc_map_offset)
   : begin_(base),
     code_offset_(code_offset),
-    frame_size_in_bytes_(frame_size_in_bytes),
-    core_spill_mask_(core_spill_mask),
-    fp_spill_mask_(fp_spill_mask),
     native_gc_map_offset_(gc_map_offset) {
 }
 
@@ -519,9 +499,6 @@
   CHECK(method != NULL);
   method->SetEntryPointFromPortableCompiledCode(GetPortableCode());
   method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
-  method->SetFrameSizeInBytes(frame_size_in_bytes_);
-  method->SetCoreSpillMask(core_spill_mask_);
-  method->SetFpSpillMask(fp_spill_mask_);
   method->SetNativeGcMap(GetNativeGcMap());  // Used by native methods in work around JNI mode.
 }
 
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index b358a00..d703731 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -37,10 +37,6 @@
 
 class OatFile {
  public:
-  // Returns an .odex file name next adjacent to the dex location.
-  // For example, for "/foo/bar/baz.jar", return "/foo/bar/baz.odex".
-  static std::string DexFilenameToOdexFilename(const std::string& location);
-
   // Open an oat file. Returns NULL on failure.  Requested base can
   // optionally be used to request where the file should be loaded.
   static OatFile* Open(const std::string& filename,
@@ -78,15 +74,6 @@
     uint32_t GetCodeOffset() const {
       return code_offset_;
     }
-    size_t GetFrameSizeInBytes() const {
-      return frame_size_in_bytes_;
-    }
-    uint32_t GetCoreSpillMask() const {
-      return core_spill_mask_;
-    }
-    uint32_t GetFpSpillMask() const {
-      return fp_spill_mask_;
-    }
     uint32_t GetNativeGcMapOffset() const {
       return native_gc_map_offset_;
     }
@@ -120,6 +107,9 @@
       return GetOatPointer<const uint8_t*>(native_gc_map_offset_);
     }
 
+    size_t GetFrameSizeInBytes() const;
+    uint32_t GetCoreSpillMask() const;
+    uint32_t GetFpSpillMask() const;
     uint32_t GetMappingTableOffset() const;
     uint32_t GetVmapTableOffset() const;
     const uint8_t* GetMappingTable() const;
@@ -130,9 +120,6 @@
     // Create an OatMethod with offsets relative to the given base address
     OatMethod(const byte* base,
               const uint32_t code_offset,
-              const size_t frame_size_in_bytes,
-              const uint32_t core_spill_mask,
-              const uint32_t fp_spill_mask,
               const uint32_t gc_map_offset);
 
    private:
@@ -147,9 +134,6 @@
     const byte* begin_;
 
     uint32_t code_offset_;
-    size_t frame_size_in_bytes_;
-    uint32_t core_spill_mask_;
-    uint32_t fp_spill_mask_;
     uint32_t native_gc_map_offset_;
 
     friend class OatClass;
@@ -165,7 +149,7 @@
       return type_;
     }
 
-    // get the OatMethod entry based on its index into the class
+    // Get the OatMethod entry based on its index into the class
     // defintion. direct methods come first, followed by virtual
     // methods. note that runtime created methods such as miranda
     // methods are not included.
@@ -278,10 +262,10 @@
   const byte* end_;
 
   // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  UniquePtr<MemMap> mem_map_;
+  std::unique_ptr<MemMap> mem_map_;
 
   // Backing memory map for oat file during cross compilation.
-  UniquePtr<ElfFile> elf_file_;
+  std::unique_ptr<ElfFile> elf_file_;
 
   // dlopen handle during runtime.
   void* dlopen_handle_;
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index 767c197..dd8ce16 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_OBJECT_CALLBACKS_H_
 #define ART_RUNTIME_OBJECT_CALLBACKS_H_
 
+// For ostream.
+#include <ostream>
 // For uint32_t.
 #include <stdint.h>
 // For size_t.
@@ -46,6 +48,7 @@
   kRootVMInternal,
   kRootJNIMonitor,
 };
+std::ostream& operator<<(std::ostream& os, const RootType& root_type);
 
 // Returns the new address of the object, returns root if it has not moved. tid and root_type are
 // only used by hprof.
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 072f074..664ac89 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -29,7 +29,7 @@
 #include "mirror/string.h"
 
 #include "runtime.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 #include <string>
 
@@ -38,201 +38,34 @@
 template <typename T>
 class ObjectLock {
  public:
-  explicit ObjectLock(Thread* self, const SirtRef<T>* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  ObjectLock(Thread* self, Handle<T> object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : self_(self), obj_(object) {
-    CHECK(object != nullptr);
-    CHECK(object->get() != nullptr);
-    obj_->get()->MonitorEnter(self_);
+    CHECK(object.Get() != nullptr);
+    obj_->MonitorEnter(self_);
   }
 
   ~ObjectLock() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->get()->MonitorExit(self_);
+    obj_->MonitorExit(self_);
   }
 
   void WaitIgnoringInterrupts() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Monitor::Wait(self_, obj_->get(), 0, 0, false, kWaiting);
+    Monitor::Wait(self_, obj_.Get(), 0, 0, false, kWaiting);
   }
 
   void Notify() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->get()->Notify(self_);
+    obj_->Notify(self_);
   }
 
   void NotifyAll() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->get()->NotifyAll(self_);
+    obj_->NotifyAll(self_);
   }
 
  private:
   Thread* const self_;
-  const SirtRef<T>* const obj_;
+  Handle<T> const obj_;
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
 
-class ClassHelper {
- public:
-  explicit ClassHelper(mirror::Class* c )
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : interface_type_list_(nullptr), klass_(nullptr) {
-    if (c != nullptr) {
-      ChangeClass(c);
-    }
-  }
-
-  void ChangeClass(mirror::Class* new_c)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(new_c != nullptr) << "klass_=" << klass_;  // Log what we were changing from if any
-    if (!new_c->IsClass()) {
-      LOG(FATAL) << "new_c=" << new_c << " cc " << new_c->GetClass() << " ccc "
-          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : nullptr);
-    }
-    klass_ = new_c;
-    interface_type_list_ = nullptr;
-  }
-
-  // The returned const char* is only guaranteed to be valid for the lifetime of the ClassHelper.
-  // If you need it longer, copy it into a std::string.
-  const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(klass_ != nullptr);
-    if (UNLIKELY(klass_->IsArrayClass())) {
-      return GetArrayDescriptor();
-    } else if (UNLIKELY(klass_->IsPrimitive())) {
-      return Primitive::Descriptor(klass_->GetPrimitiveType());
-    } else if (UNLIKELY(klass_->IsProxyClass())) {
-      descriptor_ = GetClassLinker()->GetDescriptorForProxy(klass_);
-      return descriptor_.c_str();
-    } else {
-      const DexFile& dex_file = GetDexFile();
-      const DexFile::TypeId& type_id = dex_file.GetTypeId(GetClassDef()->class_idx_);
-      return dex_file.GetTypeDescriptor(type_id);
-    }
-  }
-
-  const char* GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string result("[");
-    mirror::Class* saved_klass = klass_;
-    CHECK(saved_klass != nullptr);
-    ChangeClass(klass_->GetComponentType());
-    result += GetDescriptor();
-    ChangeClass(saved_klass);
-    descriptor_ = result;
-    return descriptor_.c_str();
-  }
-
-  const DexFile::ClassDef* GetClassDef() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != nullptr);
-    uint16_t class_def_idx = klass_->GetDexClassDefIndex();
-    if (class_def_idx == DexFile::kDexNoIndex16) {
-      return nullptr;
-    }
-    return &GetDexFile().GetClassDef(class_def_idx);
-  }
-
-  uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != nullptr);
-    if (klass_->IsPrimitive()) {
-      return 0;
-    } else if (klass_->IsArrayClass()) {
-      return 2;
-    } else if (klass_->IsProxyClass()) {
-      mirror::SynthesizedProxyClass* proxyClass = reinterpret_cast<mirror::SynthesizedProxyClass*>(klass_);
-      mirror::ObjectArray<mirror::Class>* interfaces = proxyClass->GetInterfaces();
-      return interfaces != nullptr ? interfaces->GetLength() : 0;
-    } else {
-      const DexFile::TypeList* interfaces = GetInterfaceTypeList();
-      if (interfaces == nullptr) {
-        return 0;
-      } else {
-        return interfaces->Size();
-      }
-    }
-  }
-
-  uint16_t GetDirectInterfaceTypeIdx(uint32_t idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != nullptr);
-    DCHECK(!klass_->IsPrimitive());
-    DCHECK(!klass_->IsArrayClass());
-    return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
-  }
-
-  mirror::Class* GetDirectInterface(uint32_t idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != nullptr);
-    DCHECK(!klass_->IsPrimitive());
-    if (klass_->IsArrayClass()) {
-      if (idx == 0) {
-        return GetClassLinker()->FindSystemClass(Thread::Current(), "Ljava/lang/Cloneable;");
-      } else {
-        DCHECK_EQ(1U, idx);
-        return GetClassLinker()->FindSystemClass(Thread::Current(), "Ljava/io/Serializable;");
-      }
-    } else if (klass_->IsProxyClass()) {
-      mirror::SynthesizedProxyClass* proxyClass = reinterpret_cast<mirror::SynthesizedProxyClass*>(klass_);
-      mirror::ObjectArray<mirror::Class>* interfaces = proxyClass->GetInterfaces();
-      DCHECK(interfaces != nullptr);
-      return interfaces->Get(idx);
-    } else {
-      uint16_t type_idx = GetDirectInterfaceTypeIdx(idx);
-      mirror::Class* interface = GetDexCache()->GetResolvedType(type_idx);
-      if (interface == nullptr) {
-        interface = GetClassLinker()->ResolveType(GetDexFile(), type_idx, klass_);
-        CHECK(interface != nullptr || Thread::Current()->IsExceptionPending());
-      }
-      return interface;
-    }
-  }
-
-  const char* GetSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    std::string descriptor(GetDescriptor());
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::ClassDef* dex_class_def = GetClassDef();
-    CHECK(dex_class_def != nullptr) << "No class def for class " << PrettyClass(klass_);
-    return dex_file.GetSourceFile(*dex_class_def);
-  }
-
-  std::string GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* dex_cache = GetDexCache();
-    if (dex_cache != nullptr && !klass_->IsProxyClass()) {
-      return dex_cache->GetLocation()->ToModifiedUtf8();
-    } else {
-      // Arrays and proxies are generated and have no corresponding dex file location.
-      return "generated class";
-    }
-  }
-
-  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return *GetDexCache()->GetDexFile();
-  }
-
-  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return klass_->GetDexCache();
-  }
-
- private:
-  const DexFile::TypeList* GetInterfaceTypeList()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile::TypeList* result = interface_type_list_;
-    if (result == nullptr) {
-      const DexFile::ClassDef* class_def = GetClassDef();
-      if (class_def != nullptr) {
-        result =  GetDexFile().GetInterfacesList(*class_def);
-        interface_type_list_ = result;
-      }
-    }
-    return result;
-  }
-
-  ClassLinker* GetClassLinker() ALWAYS_INLINE {
-    return Runtime::Current()->GetClassLinker();
-  }
-
-  const DexFile::TypeList* interface_type_list_;
-  mirror::Class* klass_;
-  std::string descriptor_;
-
-  DISALLOW_COPY_AND_ASSIGN(ClassHelper);
-};
-
 class FieldHelper {
  public:
   FieldHelper() : field_(nullptr) {}
@@ -306,8 +139,7 @@
       DCHECK(field_->IsStatic());
       DCHECK_LT(field_index, 2U);
       // 0 == Class[] interfaces; 1 == Class[][] throws;
-      ClassHelper kh(field_->GetDeclaringClass());
-      declaring_class_descriptor_ = kh.GetDescriptor();
+      declaring_class_descriptor_ = field_->GetDeclaringClass()->GetDescriptor();
       return declaring_class_descriptor_.c_str();
     }
     const DexFile& dex_file = GetDexFile();
@@ -378,7 +210,8 @@
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
-    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache()));
     return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, dex_cache);
   }
 
@@ -469,7 +302,7 @@
   }
 
   const char* GetDeclaringClassSourceFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ClassHelper(method_->GetDeclaringClass()).GetSourceFile();
+    return method_->GetDeclaringClass()->GetSourceFile();
   }
 
   uint16_t GetClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -520,8 +353,7 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
-  bool HasSameNameAndSignature(MethodHelper* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
     if (GetDexCache() == other->GetDexCache()) {
@@ -539,6 +371,33 @@
     return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
   }
 
+  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
+      return false;
+    }
+    const DexFile::TypeList* types = GetParameterTypeList();
+    const DexFile::TypeList* other_types = other->GetParameterTypeList();
+    if (types == nullptr) {
+      return (other_types == nullptr) || (other_types->Size() == 0);
+    } else if (UNLIKELY(other_types == nullptr)) {
+      return types->Size() == 0;
+    }
+    uint32_t num_types = types->Size();
+    if (UNLIKELY(num_types != other_types->Size())) {
+      return false;
+    }
+    for (uint32_t i = 0; i < num_types; ++i) {
+      mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+      mirror::Class* other_param_type =
+          other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
+      if (UNLIKELY(param_type != other_param_type)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   const DexFile::CodeItem* GetCodeItem()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
@@ -581,7 +440,8 @@
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
     if (UNLIKELY(s == nullptr)) {
-      SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+      StackHandleScope<1> hs(Thread::Current());
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache()));
       s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
     }
     return s;
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 7ce17e0..e4403d7 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -16,14 +16,14 @@
 
 #include "os.h"
 
-#include <cstddef>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <cstddef>
+#include <memory>
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -41,7 +41,7 @@
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != NULL);
-  UniquePtr<File> file(new File);
+  std::unique_ptr<File> file(new File);
   if (!file->Open(name, flags, 0666)) {
     return NULL;
   }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9cf8785..3756435 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 ParsedOptions* ParsedOptions::Create(const Runtime::Options& options, bool ignore_unrecognized) {
-  UniquePtr<ParsedOptions> parsed(new ParsedOptions());
+  std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
   if (parsed->Parse(options, ignore_unrecognized)) {
     return parsed.release();
   }
@@ -181,8 +181,16 @@
   parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
   // Only the main GC thread, no workers.
   conc_gc_threads_ = 0;
-  // Default is CMS which is Sticky + Partial + Full CMS GC.
+  // The default GC type is set in makefiles.
+#if ART_DEFAULT_GC_TYPE_IS_CMS
   collector_type_ = gc::kCollectorTypeCMS;
+#elif ART_DEFAULT_GC_TYPE_IS_SS
+  collector_type_ = gc::kCollectorTypeSS;
+#elif ART_DEFAULT_GC_TYPE_IS_GSS
+  collector_type_ = gc::kCollectorTypeGSS;
+#else
+#error "ART default GC type must be set"
+#endif
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
   // parsing options.
   background_collector_type_ = gc::kCollectorTypeNone;
@@ -223,15 +231,17 @@
 
 //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
 //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
-//  gLogVerbosity.verifier = true;  // TODO: don't check this in!
-//  gLogVerbosity.heap = true;  // TODO: don't check this in!
 //  gLogVerbosity.gc = true;  // TODO: don't check this in!
+//  gLogVerbosity.heap = true;  // TODO: don't check this in!
 //  gLogVerbosity.jdwp = true;  // TODO: don't check this in!
 //  gLogVerbosity.jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.monitor = true;  // TODO: don't check this in!
+//  gLogVerbosity.profiler = true;  // TODO: don't check this in!
+//  gLogVerbosity.signals = true;  // TODO: don't check this in!
 //  gLogVerbosity.startup = true;  // TODO: don't check this in!
 //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.threads = true;  // TODO: don't check this in!
+//  gLogVerbosity.verifier = true;  // TODO: don't check this in!
 
   method_trace_ = false;
   method_trace_file_ = "/data/method-trace-file.bin";
@@ -444,26 +454,30 @@
       for (size_t i = 0; i < verbose_options.size(); ++i) {
         if (verbose_options[i] == "class") {
           gLogVerbosity.class_linker = true;
-        } else if (verbose_options[i] == "verifier") {
-          gLogVerbosity.verifier = true;
         } else if (verbose_options[i] == "compiler") {
           gLogVerbosity.compiler = true;
-        } else if (verbose_options[i] == "heap") {
-          gLogVerbosity.heap = true;
         } else if (verbose_options[i] == "gc") {
           gLogVerbosity.gc = true;
+        } else if (verbose_options[i] == "heap") {
+          gLogVerbosity.heap = true;
         } else if (verbose_options[i] == "jdwp") {
           gLogVerbosity.jdwp = true;
         } else if (verbose_options[i] == "jni") {
           gLogVerbosity.jni = true;
         } else if (verbose_options[i] == "monitor") {
           gLogVerbosity.monitor = true;
+        } else if (verbose_options[i] == "profiler") {
+          gLogVerbosity.profiler = true;
+        } else if (verbose_options[i] == "signals") {
+          gLogVerbosity.signals = true;
         } else if (verbose_options[i] == "startup") {
           gLogVerbosity.startup = true;
         } else if (verbose_options[i] == "third-party-jni") {
           gLogVerbosity.third_party_jni = true;
         } else if (verbose_options[i] == "threads") {
           gLogVerbosity.threads = true;
+        } else if (verbose_options[i] == "verifier") {
+          gLogVerbosity.verifier = true;
         } else {
           Usage("Unknown -verbose option %s\n", verbose_options[i].c_str());
           return false;
@@ -519,8 +533,8 @@
       Trace::SetDefaultClockSource(kProfilerClockSourceWall);
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
-    } else if (StartsWith(option, "-Xprofile:")) {
-      if (!ParseStringAfterChar(option, ';', &profile_output_filename_)) {
+    } else if (StartsWith(option, "-Xprofile-filename:")) {
+      if (!ParseStringAfterChar(option, ':', &profile_output_filename_)) {
         return false;
       }
       profile_ = true;
@@ -660,10 +674,17 @@
   // the art specific version. This can happen with on device
   // boot.art/boot.oat generation by GenerateImage which relies on the
   // value of BOOTCLASSPATH.
+#if defined(ART_TARGET)
   std::string core_jar("/core.jar");
+  std::string core_libart_jar("/core-libart.jar");
+#else
+  // The host uses hostdex files.
+  std::string core_jar("/core-hostdex.jar");
+  std::string core_libart_jar("/core-libart-hostdex.jar");
+#endif
   size_t core_jar_pos = boot_class_path_string_.find(core_jar);
   if (core_jar_pos != std::string::npos) {
-    boot_class_path_string_.replace(core_jar_pos, core_jar.size(), "/core-libart.jar");
+    boot_class_path_string_.replace(core_jar_pos, core_jar.size(), core_libart_jar);
   }
 
   if (compiler_callbacks_ == nullptr && image_.empty()) {
@@ -765,11 +786,11 @@
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
-  UsageMessage(stream, "  -Xprofile=filename\n");
+  UsageMessage(stream, "  -Xprofile-filename:filename\n");
   UsageMessage(stream, "  -Xprofile-period:integervalue\n");
   UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
   UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-backoff:integervalue\n");
+  UsageMessage(stream, "  -Xprofile-backoff:doublevalue\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
   UsageMessage(stream, "\n");
@@ -862,11 +883,19 @@
   if (!ParseStringAfterChar(option, after_char, &substring)) {
     return false;
   }
-  std::istringstream iss(substring);
+  bool sane_val = true;
   double value;
-  iss >> value;
-  // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
-  const bool sane_val = iss.eof() && (value >= min) && (value <= max);
+  if (false) {
+    // TODO: this doesn't seem to work on the emulator.  b/15114595
+    std::stringstream iss(substring);
+    iss >> value;
+    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+    sane_val = iss.eof() && (value >= min) && (value <= max);
+  } else {
+    char* end = nullptr;
+    value = strtod(substring.c_str(), &end);
+    sane_val = *end == '\0' && value >= min && value <= max;
+  }
   if (!sane_val) {
     Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
     return false;
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index 7f293cd..b58a29c 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -16,7 +16,8 @@
 
 #include "parsed_options.h"
 
-#include "UniquePtr.h"
+#include <memory>
+
 #include "common_runtime_test.h"
 
 namespace art {
@@ -53,7 +54,7 @@
   options.push_back(std::make_pair("vfprintf", test_vfprintf));
   options.push_back(std::make_pair("abort", test_abort));
   options.push_back(std::make_pair("exit", test_exit));
-  UniquePtr<ParsedOptions> parsed(ParsedOptions::Create(options, false));
+  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false));
   ASSERT_TRUE(parsed.get() != NULL);
 
   EXPECT_EQ(lib_core, parsed->boot_class_path_string_);
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 7b117f4..6e33f9d 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -147,7 +147,7 @@
 
       startup_delay = 0;
 
-      LOG(DEBUG) << "Delaying profile start for " << delay_secs << " secs";
+      VLOG(profiler) << "Delaying profile start for " << delay_secs << " secs";
       MutexLock mu(self, profiler->wait_lock_);
       profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
 
@@ -167,7 +167,7 @@
     uint64_t end_us = start_us + profiler->duration_s_ * UINT64_C(1000000);
     uint64_t now_us = start_us;
 
-    LOG(DEBUG) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
+    VLOG(profiler) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
 
 
     SampleCheckpoint check_point(profiler);
@@ -221,7 +221,7 @@
       // After the profile has been taken, write it out.
       ScopedObjectAccess soa(self);   // Acquire the mutator lock.
       uint32_t size = profiler->WriteProfile();
-      LOG(DEBUG) << "Profile size: " << size;
+      VLOG(profiler) << "Profile size: " << size;
     }
   }
 
@@ -233,7 +233,7 @@
 // Write out the profile file if we are generating a profile.
 uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
   std::string full_name = profile_file_name_;
-  LOG(DEBUG) << "Saving profile to " << full_name;
+  VLOG(profiler) << "Saving profile to " << full_name;
 
   int fd = open(full_name.c_str(), O_RDWR);
   if (fd < 0) {
@@ -469,7 +469,7 @@
   num_null_methods_ += previous_num_null_methods_;
   num_boot_methods_ += previous_num_boot_methods_;
 
-  LOG(DEBUG) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
+  VLOG(profiler) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
   for (int i = 0 ; i < kHashSize; i++) {
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 31fdc79..938fdb7 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_PROFILER_H_
 #define ART_RUNTIME_PROFILER_H_
 
+#include <memory>
 #include <ostream>
 #include <set>
 #include <string>
@@ -29,7 +30,6 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -177,7 +177,7 @@
 
   ProfileSampleResults profile_table_;
 
-  UniquePtr<Barrier> profiler_barrier_;
+  std::unique_ptr<Barrier> profiler_barrier_;
 
   // Set of methods to be filtered out.  This will probably be rare because
   // most of the methods we want to be filtered reside in the boot path and
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index eebfba8..f38fb21 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -107,37 +107,42 @@
 TEST_F(ProxyTest, ProxyClassHelper) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
 
-  mirror::Class* I = class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader);
-  mirror::Class* J = class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader);
-  ASSERT_TRUE(I != nullptr);
-  ASSERT_TRUE(J != nullptr);
+  Handle<mirror::Class> I(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader)));
+  Handle<mirror::Class> J(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader)));
+  ASSERT_TRUE(I.Get() != nullptr);
+  ASSERT_TRUE(J.Get() != nullptr);
+
   std::vector<mirror::Class*> interfaces;
-  interfaces.push_back(I);
-  interfaces.push_back(J);
-
-  mirror::Class* proxyClass = GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces);
-  ASSERT_TRUE(proxyClass != nullptr);
-  ASSERT_TRUE(proxyClass->IsProxyClass());
-  ASSERT_TRUE(proxyClass->IsInitialized());
+  interfaces.push_back(I.Get());
+  interfaces.push_back(J.Get());
+  Handle<mirror::Class> proxy_class(hs.NewHandle(
+      GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces)));
+  interfaces.clear();  // Don't least possibly stale objects in the array as good practice.
+  ASSERT_TRUE(proxy_class.Get() != nullptr);
+  ASSERT_TRUE(proxy_class->IsProxyClass());
+  ASSERT_TRUE(proxy_class->IsInitialized());
 
   // Check ClassHelper for proxy.
-  ClassHelper kh(proxyClass);
-  EXPECT_EQ(kh.NumDirectInterfaces(), 2U);  // Interfaces$I and Interfaces$J.
-  EXPECT_EQ(I, kh.GetDirectInterface(0));
-  EXPECT_EQ(J, kh.GetDirectInterface(1));
-  std::string proxyClassDescriptor(kh.GetDescriptor());
-  EXPECT_EQ("L$Proxy1234;", proxyClassDescriptor);
+  EXPECT_EQ(proxy_class->NumDirectInterfaces(), 2U);  // Interfaces$I and Interfaces$J.
+  EXPECT_EQ(I.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 0));
+  EXPECT_EQ(J.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 1));
+  std::string proxy_class_descriptor(proxy_class->GetDescriptor());
+  EXPECT_STREQ("L$Proxy1234;", proxy_class_descriptor.c_str());
 }
 
 // Creates a proxy class and check FieldHelper works correctly.
 TEST_F(ProxyTest, ProxyFieldHelper) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
 
   mirror::Class* I = class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader);
   mirror::Class* J = class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader);
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 8bd8dba..d8fc277 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -174,26 +174,24 @@
     return false;
   }
 
-  uint32_t return_reg = return_instruction->VRegA_11x();
+  int32_t return_reg = return_instruction->VRegA_11x();
   DCHECK_LT(return_reg, code_item->registers_size_);
 
-  uint32_t vA, vB, dummy;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, dummy, nullptr);
+  int32_t const_value = instruction->VRegB();
   if (instruction->Opcode() == Instruction::CONST_HIGH16) {
-    vB <<= 16;
+    const_value <<= 16;
   }
-  DCHECK_LT(vA, code_item->registers_size_);
-  if (vA != return_reg) {
+  DCHECK_LT(instruction->VRegA(), code_item->registers_size_);
+  if (instruction->VRegA() != return_reg) {
     return false;  // Not returning the value set by const?
   }
-  if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
+  if (return_opcode == Instruction::RETURN_OBJECT && const_value != 0) {
     return false;  // Returning non-null reference constant?
   }
   if (result != nullptr) {
     result->opcode = kInlineOpNonWideConst;
     result->flags = kInlineSpecial;
-    result->d.data = static_cast<uint64_t>(vB);
+    result->d.data = static_cast<uint64_t>(const_value);
   }
   return true;
 }
diff --git a/runtime/quick/quick_method_frame_info.h b/runtime/quick/quick_method_frame_info.h
new file mode 100644
index 0000000..684d4da
--- /dev/null
+++ b/runtime/quick/quick_method_frame_info.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_QUICK_QUICK_METHOD_FRAME_INFO_H_
+#define ART_RUNTIME_QUICK_QUICK_METHOD_FRAME_INFO_H_
+
+#include <stdint.h>
+
+#include "base/macros.h"
+
+namespace art {
+
+class PACKED(4) QuickMethodFrameInfo {
+ public:
+  constexpr QuickMethodFrameInfo()
+    : frame_size_in_bytes_(0u),
+      core_spill_mask_(0u),
+      fp_spill_mask_(0u) {
+  }
+
+  constexpr QuickMethodFrameInfo(uint32_t frame_size_in_bytes, uint32_t core_spill_mask,
+                                 uint32_t fp_spill_mask)
+    : frame_size_in_bytes_(frame_size_in_bytes),
+      core_spill_mask_(core_spill_mask),
+      fp_spill_mask_(fp_spill_mask) {
+  }
+
+  uint32_t FrameSizeInBytes() const {
+    return frame_size_in_bytes_;
+  }
+
+  uint32_t CoreSpillMask() const {
+    return core_spill_mask_;
+  }
+
+  uint32_t FpSpillMask() const {
+    return fp_spill_mask_;
+  }
+
+ private:
+  uint32_t frame_size_in_bytes_;
+  uint32_t core_spill_mask_;
+  uint32_t fp_spill_mask_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_QUICK_QUICK_METHOD_FRAME_INFO_H_
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index aee0d64..8300195 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -20,7 +20,7 @@
 #include "deoptimize_stack_visitor.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "mirror/art_method-inl.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -35,10 +35,11 @@
 void QuickExceptionHandler::FindCatch(const ThrowLocation& throw_location,
                                       mirror::Throwable* exception) {
   DCHECK(!is_deoptimization_);
-  SirtRef<mirror::Throwable> exception_ref(self_, exception);
+  StackHandleScope<1> hs(self_);
+  Handle<mirror::Throwable> exception_ref(hs.NewHandle(exception));
 
   // Walk the stack to find catch handler or prepare for deoptimization.
-  CatchBlockStackVisitor visitor(self_, context_, exception_ref, this);
+  CatchBlockStackVisitor visitor(self_, context_, &exception_ref, this);
   visitor.WalkStack(true);
 
   mirror::ArtMethod* catch_method = *handler_quick_frame_;
@@ -56,13 +57,13 @@
     DCHECK(!self_->IsExceptionPending());
   } else {
     // Put exception back in root set with clear throw location.
-    self_->SetException(ThrowLocation(), exception_ref.get());
+    self_->SetException(ThrowLocation(), exception_ref.Get());
   }
   // The debugger may suspend this thread and walk its stack. Let's do this before popping
   // instrumentation frames.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   instrumentation->ExceptionCaughtEvent(self_, throw_location, catch_method, handler_dex_pc_,
-                                        exception_ref.get());
+                                        exception_ref.Get());
 }
 
 void QuickExceptionHandler::DeoptimizeStack() {
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index ea2f830..4302c9e 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -23,17 +23,18 @@
 
 namespace art {
 
-template <typename MirrorType, bool kDoReadBarrier>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   // Unused for now.
   UNUSED(obj);
   UNUSED(offset);
   UNUSED(ref_addr);
-  if (kDoReadBarrier && kUseBakerReadBarrier) {
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
     // To be implemented.
     return ref_addr->AsMirrorPtr();
-  } else if (kDoReadBarrier && kUseBrooksReadBarrier) {
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
     // To be implemented.
     return ref_addr->AsMirrorPtr();
   } else {
@@ -42,6 +43,21 @@
   }
 }
 
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
+inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType* ref) {
+  UNUSED(ref);
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else {
+    return ref;
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_READ_BARRIER_INL_H_
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 6f59004..e40e8ea 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -33,10 +33,14 @@
 
 class ReadBarrier {
  public:
-  template <typename MirrorType, bool kDoReadBarrier = true>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/compiler/dex/bit_vector_block_iterator.cc b/runtime/read_barrier_option.h
similarity index 66%
copy from compiler/dex/bit_vector_block_iterator.cc
copy to runtime/read_barrier_option.h
index 32d7d71..7de8b8a 100644
--- a/compiler/dex/bit_vector_block_iterator.cc
+++ b/runtime/read_barrier_option.h
@@ -14,19 +14,16 @@
  * limitations under the License.
  */
 
-#include "bit_vector_block_iterator.h"
-#include "mir_graph.h"
-
+#ifndef ART_RUNTIME_READ_BARRIER_OPTION_H_
+#define ART_RUNTIME_READ_BARRIER_OPTION_H_
 namespace art {
 
-BasicBlock* BitVectorBlockIterator::Next() {
-  int idx = internal_iterator_.Next();
-
-  if (idx == -1) {
-    return nullptr;
-  }
-
-  return mir_graph_->GetBasicBlock(idx);
-}
+// Options for performing a read barrier or not.
+enum ReadBarrierOption {
+  kWithReadBarrier,     // Perform a read barrier.
+  kWithoutReadBarrier,  // Don't perform a read barrier.
+};
 
 }  // namespace art
+
+#endif  // ART_RUNTIME_READ_BARRIER_OPTION_H_
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index f0ba003..c08cc30 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -100,7 +100,8 @@
     AppendWide(jv.j);
   }
 
-  void BuildArgArrayFromVarArgs(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
+  void BuildArgArrayFromVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -135,8 +136,8 @@
     }
   }
 
-  void BuildArgArrayFromJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
-                                jvalue* args)
+  void BuildArgArrayFromJValues(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, jvalue* args)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -217,7 +218,8 @@
                      PrettyDescriptor(found_descriptor.as_string()).c_str()).c_str());
   }
 
-  bool BuildArgArrayFromObjectArray(const ScopedObjectAccess& soa, mirror::Object* receiver,
+  bool BuildArgArrayFromObjectArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                                    mirror::Object* receiver,
                                     mirror::ObjectArray<mirror::Object>* args, MethodHelper& mh)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile::TypeList* classes = mh.GetParameterTypeList();
@@ -242,22 +244,21 @@
       }
 
 #define DO_FIRST_ARG(match_descriptor, get_fn, append) { \
-          const StringPiece src_descriptor(arg != nullptr \
-              ? ClassHelper(arg->GetClass<>()).GetDescriptor() \
-              : "null"); \
-          if (LIKELY(src_descriptor == match_descriptor)) { \
+          if (LIKELY(arg != nullptr && arg->GetClass<>()->DescriptorEquals(match_descriptor))) { \
             mirror::ArtField* primitive_field = arg->GetClass()->GetIFields()->Get(0); \
             append(primitive_field-> get_fn(arg));
 
 #define DO_ARG(match_descriptor, get_fn, append) \
-          } else if (LIKELY(src_descriptor == match_descriptor)) { \
+          } else if (LIKELY(arg != nullptr && \
+                            arg->GetClass<>()->DescriptorEquals(match_descriptor))) { \
             mirror::ArtField* primitive_field = arg->GetClass()->GetIFields()->Get(0); \
             append(primitive_field-> get_fn(arg));
 
 #define DO_FAIL(expected) \
           } else { \
             if (arg->GetClass<>()->IsPrimitive()) { \
-              ThrowIllegalPrimitiveArgumentException(expected, src_descriptor); \
+              ThrowIllegalPrimitiveArgumentException(expected, \
+                                                     arg->GetClass<>()->GetDescriptor().c_str()); \
             } else { \
               ThrowIllegalArgumentException(nullptr, \
                   StringPrintf("method %s argument %zd has type %s, got %s", \
@@ -343,7 +344,7 @@
   uint32_t num_bytes_;
   uint32_t* arg_array_;
   uint32_t small_arg_array_[kSmallArgArraySize];
-  UniquePtr<uint32_t[]> large_arg_array_;
+  std::unique_ptr<uint32_t[]> large_arg_array_;
 };
 
 static void CheckMethodArguments(mirror::ArtMethod* m, uint32_t* args)
@@ -397,8 +398,9 @@
 }
 
 
-static void InvokeWithArgArray(const ScopedObjectAccessUnchecked& soa, mirror::ArtMethod* method,
-                               ArgArray* arg_array, JValue* result, const char* shorty)
+static void InvokeWithArgArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                               mirror::ArtMethod* method, ArgArray* arg_array, JValue* result,
+                               const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
@@ -407,7 +409,8 @@
   method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
@@ -419,7 +422,7 @@
   return result;
 }
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   MethodHelper mh(method);
@@ -430,7 +433,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
@@ -441,7 +444,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args) {
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
@@ -461,17 +464,18 @@
                                     mh.GetShorty());
 }
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, bool accessible) {
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
   mirror::Class* declaring_class = m->GetDeclaringClass();
   if (UNLIKELY(!declaring_class->IsInitialized())) {
-    SirtRef<mirror::Class> sirt_c(soa.Self(), declaring_class);
-    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_c, true, true)) {
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Class> h_class(hs.NewHandle(declaring_class));
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_class, true, true)) {
       return nullptr;
     }
-    declaring_class = sirt_c.get();
+    declaring_class = h_class.Get();
   }
 
   mirror::Object* receiver = nullptr;
@@ -741,32 +745,32 @@
   }
 
   JValue boxed_value;
-  const StringPiece src_descriptor(ClassHelper(o->GetClass()).GetDescriptor());
+  mirror::Class* klass = o->GetClass();
   mirror::Class* src_class = nullptr;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::ArtField* primitive_field = o->GetClass()->GetIFields()->Get(0);
-  if (src_descriptor == "Ljava/lang/Boolean;") {
+  if (klass->DescriptorEquals("Ljava/lang/Boolean;")) {
     src_class = class_linker->FindPrimitiveClass('Z');
     boxed_value.SetZ(primitive_field->GetBoolean(o));
-  } else if (src_descriptor == "Ljava/lang/Byte;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Byte;")) {
     src_class = class_linker->FindPrimitiveClass('B');
     boxed_value.SetB(primitive_field->GetByte(o));
-  } else if (src_descriptor == "Ljava/lang/Character;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Character;")) {
     src_class = class_linker->FindPrimitiveClass('C');
     boxed_value.SetC(primitive_field->GetChar(o));
-  } else if (src_descriptor == "Ljava/lang/Float;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Float;")) {
     src_class = class_linker->FindPrimitiveClass('F');
     boxed_value.SetF(primitive_field->GetFloat(o));
-  } else if (src_descriptor == "Ljava/lang/Double;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Double;")) {
     src_class = class_linker->FindPrimitiveClass('D');
     boxed_value.SetD(primitive_field->GetDouble(o));
-  } else if (src_descriptor == "Ljava/lang/Integer;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Integer;")) {
     src_class = class_linker->FindPrimitiveClass('I');
     boxed_value.SetI(primitive_field->GetInt(o));
-  } else if (src_descriptor == "Ljava/lang/Long;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Long;")) {
     src_class = class_linker->FindPrimitiveClass('J');
     boxed_value.SetJ(primitive_field->GetLong(o));
-  } else if (src_descriptor == "Ljava/lang/Short;") {
+  } else if (klass->DescriptorEquals("Ljava/lang/Short;")) {
     src_class = class_linker->FindPrimitiveClass('S');
     boxed_value.SetS(primitive_field->GetShort(o));
   } else {
@@ -774,7 +778,7 @@
                                   StringPrintf("%s has type %s, got %s",
                                                UnboxingFailureKind(f).c_str(),
                                                PrettyDescriptor(dst_class).c_str(),
-                                               PrettyDescriptor(src_descriptor.data()).c_str()).c_str());
+                                               PrettyDescriptor(o->GetClass()->GetDescriptor()).c_str()).c_str());
     return false;
   }
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index d9a7228..2c54c06 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -29,8 +29,7 @@
 }  // namespace mirror
 union JValue;
 class MethodHelper;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class ThrowLocation;
 
@@ -48,18 +47,19 @@
                            const JValue& src, JValue* dst)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -67,7 +67,7 @@
                            MethodHelper& mh, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject method, jobject receiver,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, bool accessible)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index f7fc020..3b66abe 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -87,10 +87,10 @@
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
     jobject jclass_loader(LoadDex(class_name));
     Thread* self = Thread::Current();
-    SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
-    SirtRef<mirror::ClassLoader>
-        class_loader(self,
-                     ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader*>(jclass_loader));
+    StackHandleScope<2> hs(self);
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(
+            ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader*>(jclass_loader)));
     if (is_static) {
       MakeExecutable(ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader*>(jclass_loader),
                      class_name);
@@ -485,8 +485,9 @@
   TEST_DISABLED_FOR_PORTABLE();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
-  SirtRef<mirror::ClassLoader>
-      class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
   CompileDirectMethod(class_loader, "Main", "main", "([Ljava/lang/String;)V");
 
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LMain;", class_loader);
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
new file mode 100644
index 0000000..29ddd1d
--- /dev/null
+++ b/runtime/runtime-inl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_RUNTIME_INL_H_
+#define ART_RUNTIME_RUNTIME_INL_H_
+
+#include "runtime.h"
+
+namespace art {
+
+inline QuickMethodFrameInfo Runtime::GetRuntimeMethodFrameInfo(mirror::ArtMethod* method) const {
+  DCHECK(method != nullptr);
+  // Cannot be imt-conflict-method or resolution-method.
+  DCHECK(method != GetImtConflictMethod());
+  DCHECK(method != GetResolutionMethod());
+  // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
+  if (method == callee_save_methods_[Runtime::kRefsAndArgs]) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  } else if (method == callee_save_methods_[Runtime::kSaveAll]) {
+    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll);
+  } else {
+    DCHECK(method == callee_save_methods_[Runtime::kRefsOnly]);
+    return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly);
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_RUNTIME_INL_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index fbc0460..dcbf42d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -27,13 +27,19 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
+#include <memory>
 #include <vector>
 #include <fcntl.h>
 
+#include "arch/arm/quick_method_frame_info_arm.h"
 #include "arch/arm/registers_arm.h"
+#include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "arch/arm64/registers_arm64.h"
+#include "arch/mips/quick_method_frame_info_mips.h"
 #include "arch/mips/registers_mips.h"
+#include "arch/x86/quick_method_frame_info_x86.h"
 #include "arch/x86/registers_x86.h"
+#include "arch/x86_64/quick_method_frame_info_x86_64.h"
 #include "arch/x86_64/registers_x86_64.h"
 #include "atomic.h"
 #include "class_linker.h"
@@ -55,18 +61,18 @@
 #include "monitor.h"
 #include "parsed_options.h"
 #include "oat_file.h"
+#include "quick/quick_method_frame_info.h"
 #include "reflection.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "signal_set.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "trace.h"
 #include "transaction.h"
 #include "profiler.h"
-#include "UniquePtr.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
 
@@ -88,6 +94,7 @@
       resolution_method_(nullptr),
       imt_conflict_method_(nullptr),
       default_imt_(nullptr),
+      instruction_set_(kNone),
       compiler_callbacks_(nullptr),
       is_zygote_(false),
       is_concurrent_gc_enabled_(true),
@@ -160,7 +167,7 @@
   Trace::Shutdown();
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForGcToComplete(self);
+  heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
@@ -202,6 +209,8 @@
     Thread* self = Thread::Current();
     if (self == nullptr) {
       os << "(Aborting thread was not attached to runtime!)\n";
+      DumpKernelStack(os, GetTid(), "  kernel: ", false);
+      DumpNativeStack(os, GetTid(), "  native: ", nullptr);
     } else {
       os << "Aborting thread:\n";
       if (Locks::mutator_lock_->IsExclusiveHeld(self) || Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -322,8 +331,9 @@
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
 
-  SirtRef<mirror::Class> class_loader_class(
-      soa.Self(), soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ClassLoader));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::Class> class_loader_class(
+      hs.NewHandle(soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ClassLoader)));
   CHECK(cl->EnsureInitialized(class_loader_class, true, true));
 
   mirror::ArtMethod* getSystemClassLoader =
@@ -331,19 +341,18 @@
   CHECK(getSystemClassLoader != NULL);
 
   JValue result = InvokeWithJValues(soa, nullptr, soa.EncodeMethod(getSystemClassLoader), nullptr);
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            down_cast<mirror::ClassLoader*>(result.GetL()));
-  CHECK(class_loader.get() != nullptr);
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(down_cast<mirror::ClassLoader*>(result.GetL())));
+  CHECK(class_loader.Get() != nullptr);
   JNIEnv* env = soa.Self()->GetJniEnv();
   ScopedLocalRef<jobject> system_class_loader(env,
-                                              soa.AddLocalReference<jobject>(class_loader.get()));
+                                              soa.AddLocalReference<jobject>(class_loader.Get()));
   CHECK(system_class_loader.get() != nullptr);
 
-  soa.Self()->SetClassLoaderOverride(class_loader.get());
+  soa.Self()->SetClassLoaderOverride(class_loader.Get());
 
-  SirtRef<mirror::Class> thread_class(
-      soa.Self(),
-      soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread));
+  Handle<mirror::Class> thread_class(
+      hs.NewHandle(soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread)));
   CHECK(cl->EnsureInitialized(thread_class, true, true));
 
   mirror::ArtField* contextClassLoader =
@@ -351,7 +360,7 @@
   CHECK(contextClassLoader != NULL);
 
   // We can't run in a transaction yet.
-  contextClassLoader->SetObject<false>(soa.Self()->GetPeer(), class_loader.get());
+  contextClassLoader->SetObject<false>(soa.Self()->GetPeer(), class_loader.Get());
 
   return env->NewGlobalRef(system_class_loader.get());
 }
@@ -386,7 +395,10 @@
 
   system_class_loader_ = CreateSystemClassLoader();
 
-  self->GetJniEnv()->locals.AssertEmpty();
+  {
+    ScopedObjectAccess soa(self);
+    self->GetJniEnv()->locals.AssertEmpty();
+  }
 
   VLOG(startup) << "Runtime::Start exiting";
 
@@ -493,7 +505,7 @@
 bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
-  UniquePtr<ParsedOptions> options(ParsedOptions::Create(raw_options, ignore_unrecognized));
+  std::unique_ptr<ParsedOptions> options(ParsedOptions::Create(raw_options, ignore_unrecognized));
   if (options.get() == NULL) {
     LOG(ERROR) << "Failed to parse options";
     return false;
@@ -610,6 +622,9 @@
   class_linker_ = new ClassLinker(intern_table_);
   if (GetHeap()->HasImageSpace()) {
     class_linker_->InitFromImage();
+    if (kIsDebugBuild) {
+      GetHeap()->GetImageSpace()->VerifyImageAllocations();
+    }
   } else {
     CHECK(options->boot_class_path_ != NULL);
     CHECK_NE(options->boot_class_path_->size(), 0U);
@@ -673,7 +688,8 @@
     std::string mapped_name(StringPrintf(OS_SHARED_LIB_FORMAT_STR, "javacore"));
     std::string reason;
     self->TransitionFromSuspendedToRunnable();
-    SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+    StackHandleScope<1> hs(self);
+    auto class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
     if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, class_loader, &reason)) {
       LOG(FATAL) << "LoadNativeLibrary failed for \"" << mapped_name << "\": " << reason;
     }
@@ -733,6 +749,7 @@
   REGISTER(register_java_lang_System);
   REGISTER(register_java_lang_Thread);
   REGISTER(register_java_lang_VMClassLoader);
+  REGISTER(register_java_lang_ref_Reference);
   REGISTER(register_java_lang_reflect_Array);
   REGISTER(register_java_lang_reflect_Constructor);
   REGISTER(register_java_lang_reflect_Field);
@@ -928,25 +945,28 @@
 }
 
 void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  VisitConcurrentRoots(callback, arg, flags);
   VisitNonConcurrentRoots(callback, arg);
+  VisitConcurrentRoots(callback, arg, flags);
 }
 
 mirror::ObjectArray<mirror::ArtMethod>* Runtime::CreateDefaultImt(ClassLinker* cl) {
   Thread* self = Thread::Current();
-  SirtRef<mirror::ObjectArray<mirror::ArtMethod> > imtable(self, cl->AllocArtMethodArray(self, 64));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ObjectArray<mirror::ArtMethod>> imtable(
+      hs.NewHandle(cl->AllocArtMethodArray(self, 64)));
   mirror::ArtMethod* imt_conflict_method = Runtime::Current()->GetImtConflictMethod();
   for (size_t i = 0; i < static_cast<size_t>(imtable->GetLength()); i++) {
     imtable->Set<false>(i, imt_conflict_method);
   }
-  return imtable.get();
+  return imtable.Get();
 }
 
 mirror::ArtMethod* Runtime::CreateImtConflictMethod() {
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> method(hs.NewHandle(class_linker->AllocArtMethod(self)));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for imt conflict method saves.
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
@@ -958,14 +978,15 @@
     method->SetEntryPointFromPortableCompiledCode(GetPortableImtConflictTrampoline(class_linker));
     method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictTrampoline(class_linker));
   }
-  return method.get();
+  return method.Get();
 }
 
 mirror::ArtMethod* Runtime::CreateResolutionMethod() {
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> method(hs.NewHandle(class_linker->AllocArtMethod(self)));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for resolution method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
@@ -977,133 +998,22 @@
     method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(class_linker));
     method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(class_linker));
   }
-  return method.get();
+  return method.Get();
 }
 
-mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(InstructionSet instruction_set,
-                                                   CalleeSaveType type) {
+mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(CalleeSaveType type) {
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ArtMethod> method(hs.NewHandle(class_linker->AllocArtMethod(self)));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for callee saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   method->SetEntryPointFromPortableCompiledCode(nullptr);
   method->SetEntryPointFromQuickCompiledCode(nullptr);
-  if ((instruction_set == kThumb2) || (instruction_set == kArm)) {
-    uint32_t ref_spills = (1 << art::arm::R5) | (1 << art::arm::R6)  | (1 << art::arm::R7) |
-                          (1 << art::arm::R8) | (1 << art::arm::R10) | (1 << art::arm::R11);
-    uint32_t arg_spills = (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
-    uint32_t all_spills = (1 << art::arm::R4) | (1 << art::arm::R9);
-    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-                           (type == kSaveAll ? all_spills : 0) | (1 << art::arm::LR);
-    uint32_t fp_all_spills = (1 << art::arm::S0)  | (1 << art::arm::S1)  | (1 << art::arm::S2) |
-                             (1 << art::arm::S3)  | (1 << art::arm::S4)  | (1 << art::arm::S5) |
-                             (1 << art::arm::S6)  | (1 << art::arm::S7)  | (1 << art::arm::S8) |
-                             (1 << art::arm::S9)  | (1 << art::arm::S10) | (1 << art::arm::S11) |
-                             (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) |
-                             (1 << art::arm::S15) | (1 << art::arm::S16) | (1 << art::arm::S17) |
-                             (1 << art::arm::S18) | (1 << art::arm::S19) | (1 << art::arm::S20) |
-                             (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
-                             (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) |
-                             (1 << art::arm::S27) | (1 << art::arm::S28) | (1 << art::arm::S29) |
-                             (1 << art::arm::S30) | (1 << art::arm::S31);
-    uint32_t fp_spills = type == kSaveAll ? fp_all_spills : 0;
-    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
-                                 POPCOUNT(fp_spills) /* fprs */ +
-                                 1 /* Method* */) * kArmPointerSize, kStackAlignment);
-    method->SetFrameSizeInBytes(frame_size);
-    method->SetCoreSpillMask(core_spills);
-    method->SetFpSpillMask(fp_spills);
-  } else if (instruction_set == kMips) {
-    uint32_t ref_spills = (1 << art::mips::S2) | (1 << art::mips::S3) | (1 << art::mips::S4) |
-                          (1 << art::mips::S5) | (1 << art::mips::S6) | (1 << art::mips::S7) |
-                          (1 << art::mips::GP) | (1 << art::mips::FP);
-    uint32_t arg_spills = (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
-    uint32_t all_spills = (1 << art::mips::S0) | (1 << art::mips::S1);
-    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-                           (type == kSaveAll ? all_spills : 0) | (1 << art::mips::RA);
-    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
-                                (type == kRefsAndArgs ? 0 : 3) + 1 /* Method* */) *
-                                kMipsPointerSize, kStackAlignment);
-    method->SetFrameSizeInBytes(frame_size);
-    method->SetCoreSpillMask(core_spills);
-    method->SetFpSpillMask(0);
-  } else if (instruction_set == kX86) {
-    uint32_t ref_spills = (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI);
-    uint32_t arg_spills = (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
-    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-                         (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
-    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
-                                 1 /* Method* */) * kX86PointerSize, kStackAlignment);
-    method->SetFrameSizeInBytes(frame_size);
-    method->SetCoreSpillMask(core_spills);
-    method->SetFpSpillMask(0);
-  } else if (instruction_set == kX86_64) {
-    uint32_t ref_spills =
-        (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
-        (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
-    uint32_t arg_spills =
-        (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
-        (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
-    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-        (1 << art::x86_64::kNumberOfCpuRegisters);  // fake return address callee save
-    uint32_t fp_arg_spills =
-        (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) |
-        (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
-        (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
-    uint32_t fp_spills = (type == kRefsAndArgs ? fp_arg_spills : 0);
-    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
-                                 POPCOUNT(fp_spills) /* fprs */ +
-                                 1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
-    method->SetFrameSizeInBytes(frame_size);
-    method->SetCoreSpillMask(core_spills);
-    method->SetFpSpillMask(fp_spills);
-  } else if (instruction_set == kArm64) {
-      // Callee saved registers
-      uint32_t ref_spills = (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
-                            (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
-                            (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
-                            (1 << art::arm64::X28);
-      // X0 is the method pointer. Not saved.
-      uint32_t arg_spills = (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
-                            (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
-                            (1 << art::arm64::X7);
-      // TODO  This is conservative. Only ALL should include the thread register.
-      // The thread register is not preserved by the aapcs64.
-      // LR is always saved.
-      uint32_t all_spills =  0;  // (1 << art::arm64::LR);
-      uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
-                             (type == kSaveAll ? all_spills : 0) | (1 << art::arm64::FP)
-                             | (1 << art::arm64::X18) | (1 << art::arm64::LR);
-
-      // Save callee-saved floating point registers. Rest are scratch/parameters.
-      uint32_t fp_arg_spills = (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
-                            (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
-                            (1 << art::arm64::D6) | (1 << art::arm64::D7);
-      uint32_t fp_ref_spills = (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
-                               (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
-                               (1 << art::arm64::D14)  | (1 << art::arm64::D15);
-      uint32_t fp_all_spills = fp_arg_spills |
-                          (1 << art::arm64::D16)  | (1 << art::arm64::D17) | (1 << art::arm64::D18) |
-                          (1 << art::arm64::D19)  | (1 << art::arm64::D20) | (1 << art::arm64::D21) |
-                          (1 << art::arm64::D22)  | (1 << art::arm64::D23) | (1 << art::arm64::D24) |
-                          (1 << art::arm64::D25)  | (1 << art::arm64::D26) | (1 << art::arm64::D27) |
-                          (1 << art::arm64::D28)  | (1 << art::arm64::D29) | (1 << art::arm64::D30) |
-                          (1 << art::arm64::D31);
-      uint32_t fp_spills = fp_ref_spills | (type == kRefsAndArgs ? fp_arg_spills: 0)
-                          | (type == kSaveAll ? fp_all_spills : 0);
-      size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
-                                   POPCOUNT(fp_spills) /* fprs */ +
-                                   1 /* Method* */) * kArm64PointerSize, kStackAlignment);
-      method->SetFrameSizeInBytes(frame_size);
-      method->SetCoreSpillMask(core_spills);
-      method->SetFpSpillMask(fp_spills);
-  } else {
-    UNIMPLEMENTED(FATAL) << instruction_set;
-  }
-  return method.get();
+  DCHECK_NE(instruction_set_, kNone);
+  return method.Get();
 }
 
 void Runtime::DisallowNewSystemWeaks() {
@@ -1120,6 +1030,38 @@
   Dbg::AllowNewObjectRegistryObjects();
 }
 
+void Runtime::SetInstructionSet(InstructionSet instruction_set) {
+  instruction_set_ = instruction_set;
+  if ((instruction_set_ == kThumb2) || (instruction_set_ == kArm)) {
+    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+      CalleeSaveType type = static_cast<CalleeSaveType>(i);
+      callee_save_method_frame_infos_[i] = arm::ArmCalleeSaveMethodFrameInfo(type);
+    }
+  } else if (instruction_set_ == kMips) {
+    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+      CalleeSaveType type = static_cast<CalleeSaveType>(i);
+      callee_save_method_frame_infos_[i] = mips::MipsCalleeSaveMethodFrameInfo(type);
+    }
+  } else if (instruction_set_ == kX86) {
+    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+      CalleeSaveType type = static_cast<CalleeSaveType>(i);
+      callee_save_method_frame_infos_[i] = x86::X86CalleeSaveMethodFrameInfo(type);
+    }
+  } else if (instruction_set_ == kX86_64) {
+    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+      CalleeSaveType type = static_cast<CalleeSaveType>(i);
+      callee_save_method_frame_infos_[i] = x86_64::X86_64CalleeSaveMethodFrameInfo(type);
+    }
+  } else if (instruction_set_ == kArm64) {
+    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+      CalleeSaveType type = static_cast<CalleeSaveType>(i);
+      callee_save_method_frame_infos_[i] = arm64::Arm64CalleeSaveMethodFrameInfo(type);
+    }
+  } else {
+    UNIMPLEMENTED(FATAL) << instruction_set_;
+  }
+}
+
 void Runtime::SetCalleeSaveMethod(mirror::ArtMethod* method, CalleeSaveType type) {
   DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType));
   callee_save_methods_[type] = method;
@@ -1271,17 +1213,9 @@
   // Make the dex2oat instruction set match that of the launching runtime. If we have multiple
   // architecture support, dex2oat may be compiled as a different instruction-set than that
   // currently being executed.
-#if defined(__arm__)
-  argv->push_back("--instruction-set=arm");
-#elif defined(__aarch64__)
-  argv->push_back("--instruction-set=arm64");
-#elif defined(__i386__)
-  argv->push_back("--instruction-set=x86");
-#elif defined(__x86_64__)
-  argv->push_back("--instruction-set=x86_64");
-#elif defined(__mips__)
-  argv->push_back("--instruction-set=mips");
-#endif
+  std::string instruction_set("--instruction-set=");
+  instruction_set += GetInstructionSetString(kRuntimeISA);
+  argv->push_back(instruction_set);
 
   std::string features("--instruction-set-features=");
   features += GetDefaultInstructionSetFeatures();
@@ -1289,6 +1223,6 @@
 }
 
 void Runtime::UpdateProfilerState(int state) {
-  LOG(DEBUG) << "Profiler state updated to " << state;
+  VLOG(profiler) << "Profiler state updated to " << state;
 }
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1ee0b1a..f7074f6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -34,6 +34,7 @@
 #include "instrumentation.h"
 #include "jobject_comparator.h"
 #include "object_callbacks.h"
+#include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
 #include "fault_handler.h"
@@ -81,7 +82,7 @@
 
 class Runtime {
  public:
-  typedef std::vector<std::pair<std::string, const void*> > Options;
+  typedef std::vector<std::pair<std::string, const void*>> Options;
 
   // Creates and initializes a new runtime.
   static bool Create(const Options& options, bool ignore_unrecognized)
@@ -325,20 +326,25 @@
     return callee_save_methods_[type];
   }
 
+  QuickMethodFrameInfo GetCalleeSaveMethodFrameInfo(CalleeSaveType type) const {
+    return callee_save_method_frame_infos_[type];
+  }
+
+  QuickMethodFrameInfo GetRuntimeMethodFrameInfo(mirror::ArtMethod* method) const;
+
   static size_t GetCalleeSaveMethodOffset(CalleeSaveType type) {
     return OFFSETOF_MEMBER(Runtime, callee_save_methods_[type]);
   }
 
+  InstructionSet GetInstructionSet() const {
+    return instruction_set_;
+  }
+
+  void SetInstructionSet(InstructionSet instruction_set);
+
   void SetCalleeSaveMethod(mirror::ArtMethod* method, CalleeSaveType type);
 
-  mirror::ArtMethod* CreateCalleeSaveMethod(InstructionSet instruction_set,
-                                                 CalleeSaveType type)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::ArtMethod* CreateRefOnlyCalleeSaveMethod(InstructionSet instruction_set)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::ArtMethod* CreateRefAndArgsCalleeSaveMethod(InstructionSet instruction_set)
+  mirror::ArtMethod* CreateCalleeSaveMethod(CalleeSaveType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   int32_t GetStat(int kind);
@@ -468,6 +474,9 @@
   mirror::ArtMethod* imt_conflict_method_;
   mirror::ObjectArray<mirror::ArtMethod>* default_imt_;
 
+  InstructionSet instruction_set_;
+  QuickMethodFrameInfo callee_save_method_frame_infos_[kLastCalleeSaveType];
+
   CompilerCallbacks* compiler_callbacks_;
   bool is_zygote_;
   bool is_concurrent_gc_enabled_;
@@ -514,7 +523,7 @@
   size_t threads_being_born_ GUARDED_BY(Locks::runtime_shutdown_lock_);
 
   // Waited upon until no threads are being born.
-  UniquePtr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+  std::unique_ptr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
 
   // Set when runtime shutdown is past the point that new threads may attach.
   bool shutting_down_ GUARDED_BY(Locks::runtime_shutdown_lock_);
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 393bf92..190db60 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -27,7 +27,7 @@
 // Equivalent to std::map, but without operator[] and its bug-prone semantics (in particular,
 // the implicit insertion of a default-constructed value on failed lookups).
 template <typename K, typename V, typename Comparator = std::less<K>,
-          typename Allocator = std::allocator<std::pair<const K, V> > >
+          typename Allocator = std::allocator<std::pair<const K, V>>>
 class SafeMap {
  private:
   typedef SafeMap<K, V, Comparator, Allocator> Self;
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 7698d6a..d56495e 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -25,7 +25,7 @@
 
 // Scoped change into and out of a particular state. Handles Runnable transitions that require
 // more complicated suspension checking. The subclasses ScopedObjectAccessUnchecked and
-// ScopedObjectAccess are used to handle the change into Runnable to get direct access to objects,
+// ScopedObjectAccess are used to handle the change into Runnable to Get direct access to objects,
 // the unchecked variant doesn't aid annotalysis.
 class ScopedThreadStateChange {
  public:
@@ -93,50 +93,15 @@
   ThreadState old_thread_state_;
   const bool expected_has_no_thread_;
 
+  friend class ScopedObjectAccessUnchecked;
   DISALLOW_COPY_AND_ASSIGN(ScopedThreadStateChange);
 };
 
-// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
-//
-// This class performs the necessary thread state switching to and from Runnable and lets us
-// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
-// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
-// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
-// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
-// is also manipulating the Object.
-//
-// The destructor transitions back to the previous thread state, typically Native. In this state
-// GC and thread suspension may occur.
-//
-// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
-// the mutator_lock_ will be acquired on construction.
-class ScopedObjectAccessUnchecked : public ScopedThreadStateChange {
+// Assumes we are already runnable.
+class ScopedObjectAccessAlreadyRunnable {
  public:
-  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
-      : ScopedThreadStateChange(ThreadForEnv(env), kRunnable),
-        env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  explicit ScopedObjectAccessUnchecked(Thread* self)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      : ScopedThreadStateChange(self, kRunnable),
-        env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
-        vm_(env_ != NULL ? env_->vm : NULL) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
-  // change into Runnable or acquire a share on the mutator_lock_.
-  explicit ScopedObjectAccessUnchecked(JavaVM* vm)
-      : ScopedThreadStateChange(), env_(NULL), vm_(down_cast<JavaVMExt*>(vm)) {}
-
-  // Here purely to force inlining.
-  ~ScopedObjectAccessUnchecked() ALWAYS_INLINE {
-    Locks::mutator_lock_->AssertSharedHeld(self_);
+  Thread* Self() const {
+    return self_;
   }
 
   JNIEnvExt* Env() const {
@@ -159,13 +124,11 @@
   template<typename T>
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     if (obj == NULL) {
       return NULL;
     }
-
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-
     return Env()->AddLocalReference<T>(obj);
   }
 
@@ -173,14 +136,14 @@
   T Decode(jobject obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     return down_cast<T>(Self()->DecodeJObject(obj));
   }
 
   mirror::ArtField* DecodeField(jfieldID fid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
@@ -188,7 +151,7 @@
   jfieldID EncodeField(mirror::ArtField* field) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
@@ -196,7 +159,7 @@
   mirror::ArtMethod* DecodeMethod(jmethodID mid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<mirror::ArtMethod*>(mid);
   }
@@ -204,16 +167,83 @@
   jmethodID EncodeMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<jmethodID>(method);
   }
 
- private:
+  bool IsRunnable() const {
+    return self_->GetState() == kRunnable;
+  }
+
+ protected:
+  explicit ScopedObjectAccessAlreadyRunnable(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(ThreadForEnv(env)), env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
+  }
+
+  explicit ScopedObjectAccessAlreadyRunnable(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(self), env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
+        vm_(env_ != nullptr ? env_->vm : nullptr) {
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessAlreadyRunnable(JavaVM* vm)
+      : self_(nullptr), env_(nullptr), vm_(down_cast<JavaVMExt*>(vm)) {}
+
+  // Here purely to force inlining.
+  ~ScopedObjectAccessAlreadyRunnable() ALWAYS_INLINE {
+  }
+
+  // Self thread, can be null.
+  Thread* const self_;
   // The full JNIEnv.
   JNIEnvExt* const env_;
   // The full JavaVM.
   JavaVMExt* const vm_;
+};
+
+// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
+//
+// This class performs the necessary thread state switching to and from Runnable and lets us
+// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
+// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
+// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
+// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
+// is also manipulating the Object.
+//
+// The destructor transitions back to the previous thread state, typically Native. In this state
+// GC and thread suspension may occur.
+//
+// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
+// the mutator_lock_ will be acquired on construction.
+class ScopedObjectAccessUnchecked : public ScopedObjectAccessAlreadyRunnable {
+ public:
+  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(env), tsc_(Self(), kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  explicit ScopedObjectAccessUnchecked(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(self), tsc_(self, kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(vm), tsc_() {}
+
+ private:
+  // The scoped thread state change makes sure that we are runnable and restores the thread state
+  // in the destructor.
+  const ScopedThreadStateChange tsc_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedObjectAccessUnchecked);
 };
@@ -229,7 +259,7 @@
 
   explicit ScopedObjectAccess(Thread* self)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
+      SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
       : ScopedObjectAccessUnchecked(self) {
   }
 
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 611c0a8..c13776d 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -106,7 +106,7 @@
     PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
     return;
   }
-  UniquePtr<File> file(new File(fd, stack_trace_file_));
+  std::unique_ptr<File> file(new File(fd, stack_trace_file_));
   if (!file->WriteFully(s.data(), s.size())) {
     PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file_ << "'";
   } else {
diff --git a/runtime/sirt_ref-inl.h b/runtime/sirt_ref-inl.h
deleted file mode 100644
index 7de624a..0000000
--- a/runtime/sirt_ref-inl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_SIRT_REF_INL_H_
-#define ART_RUNTIME_SIRT_REF_INL_H_
-
-#include "sirt_ref.h"
-
-#include "verify_object-inl.h"
-
-namespace art {
-
-template<class T> inline SirtRef<T>::SirtRef(Thread* self, T* object, bool should_verify)
-  : self_(self), sirt_(object) {
-  if (should_verify) {
-    VerifyObject(object);
-  }
-  self_->PushSirt(&sirt_);
-}
-
-template<class T> inline SirtRef<T>::~SirtRef() {
-  StackIndirectReferenceTable* top_sirt = self_->PopSirt();
-  DCHECK_EQ(top_sirt, &sirt_);
-}
-
-template<class T> inline T* SirtRef<T>::reset(T* object, bool should_verify) {
-  if (should_verify) {
-    VerifyObject(object);
-  }
-  T* old_ref = get();
-  sirt_.SetReference(0, object);
-  return old_ref;
-}
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_SIRT_REF_INL_H_
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
deleted file mode 100644
index cf23891..0000000
--- a/runtime/sirt_ref.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_SIRT_REF_H_
-#define ART_RUNTIME_SIRT_REF_H_
-
-#include "base/casts.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "stack_indirect_reference_table.h"
-#include "thread.h"
-
-namespace art {
-
-template<class T>
-class SirtRef {
- public:
-  SirtRef(Thread* self, T* object, bool should_verify = true);
-  ~SirtRef();
-
-  T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return *get();
-  }
-  T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return get();
-  }
-  T* get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return down_cast<T*>(sirt_.GetReference(0));
-  }
-
-  // Returns the old reference.
-  T* reset(T* object = nullptr, bool should_verify = true)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
- private:
-  Thread* const self_;
-  StackIndirectReferenceTable sirt_;
-
-  DISALLOW_COPY_AND_ASSIGN(SirtRef);
-};
-
-// A version of SirtRef which disables the object verification.
-template<class T>
-class SirtRefNoVerify : public SirtRef<T> {
- public:
-  SirtRefNoVerify(Thread* self, T* object) : SirtRef<T>(self, object, false) {}
-  // Returns the old reference.
-  T* reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return SirtRef<T>::reset(object, false);
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_SIRT_REF_H_
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9c709ae..be1fba4 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -23,6 +23,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
+#include "quick/quick_method_frame_info.h"
 #include "runtime.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -32,14 +33,6 @@
 
 namespace art {
 
-// Define a piece of memory, the address of which can be used as a marker
-// for the gap in the stack added during stack overflow handling.
-static uint32_t stack_overflow_object;
-
-// The stack overflow gap marker is simply a valid unique address.
-void* stack_overflow_gap_marker = &stack_overflow_object;
-
-
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -118,11 +111,9 @@
     return NULL;
   } else if (m->IsNative()) {
     if (cur_quick_frame_ != NULL) {
-      StackIndirectReferenceTable* sirt =
-          reinterpret_cast<StackIndirectReferenceTable*>(
-              reinterpret_cast<char*>(cur_quick_frame_) +
-              m->GetSirtOffsetInBytes());
-      return sirt->GetReference(0);
+      HandleScope* hs = reinterpret_cast<HandleScope*>(
+          reinterpret_cast<char*>(cur_quick_frame_) + m->GetHandleScopeOffsetInBytes());
+      return hs->GetReference(0);
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
     }
@@ -148,20 +139,21 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-      uint32_t spill_mask = is_float ? m->GetFpSpillMask()
-                                     : m->GetCoreSpillMask();
+      uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       return GetGPR(vmap_table.ComputeRegister(spill_mask, vmap_offset, kind));
     } else {
       const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
-      size_t frame_size = m->GetFrameSizeInBytes();
-      return *GetVRegAddr(cur_quick_frame_, code_item, m->GetCoreSpillMask(), m->GetFpSpillMask(),
-                          frame_size, vreg);
+      return *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
+                          frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
     }
   } else {
     return cur_shadow_frame_->GetVReg(vreg);
@@ -173,21 +165,22 @@
   if (cur_quick_frame_ != NULL) {
     DCHECK(context_ != NULL);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    const VmapTable vmap_table(m->GetVmapTable());
+    const void* code_pointer = m->GetQuickOatCodePointer();
+    DCHECK(code_pointer != nullptr);
+    const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+    QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
     uint32_t vmap_offset;
     // TODO: IsInContext stops before spotting floating point registers.
     if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-      uint32_t spill_mask = is_float ? m->GetFpSpillMask() : m->GetCoreSpillMask();
+      uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kReferenceVReg);
       SetGPR(reg, new_value);
     } else {
       const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
-      uint32_t core_spills = m->GetCoreSpillMask();
-      uint32_t fp_spills = m->GetFpSpillMask();
-      size_t frame_size = m->GetFrameSizeInBytes();
-      int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg, kRuntimeISA);
+      int offset = GetVRegOffset(code_item, frame_info.CoreSpillMask(), frame_info.FpSpillMask(),
+                                 frame_info.FrameSizeInBytes(), vreg, kRuntimeISA);
       byte* vreg_addr = reinterpret_cast<byte*>(GetCurrentQuickFrame()) + offset;
       *reinterpret_cast<uint32_t*>(vreg_addr) = new_value;
     }
@@ -278,7 +271,7 @@
 void StackVisitor::SanityCheckFrame() const {
   if (kIsDebugBuild) {
     mirror::ArtMethod* method = GetMethod();
-    CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
+    CHECK_EQ(method->GetClass(), mirror::ArtMethod::GetJavaLangReflectArtMethod());
     if (cur_quick_frame_ != nullptr) {
       method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
       // Frame sanity.
@@ -286,7 +279,7 @@
       CHECK_NE(frame_size, 0u);
       // A rough guess at an upper size we expect to see for a frame.
       // 256 registers
-      // 2 words Sirt overhead
+      // 2 words HandleScope overhead
       // 3+3 register spills
       // TODO: this seems architecture specific for the case of JNI frames.
       // TODO: 083-compiler-regressions ManyFloatArgs shows this estimate is wrong.
@@ -305,67 +298,31 @@
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
 
-  bool kDebugStackWalk = false;
-  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
-
-  if (kDebugStackWalk) {
-    LOG(INFO) << "walking stack";
-  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
-    if (kDebugStackWalkVeryVerbose) {
-      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
-      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
-    }
 
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        // Check for a stack overflow gap marker.
-        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
-          // Marker for a stack overflow.  This is followed by the offset from the
-          // current SP to the next frame.  There is a gap in the stack here.  Jump
-          // the gap silently.
-          // Caveat coder: the layout of the overflow marker depends on the architecture.
-          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
-          //   element is 32 bits.  So be careful with those address calculations.
-
-          // Get the address of the offset, just beyond the marker pointer.
-          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
-          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
-          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
-          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
-            reinterpret_cast<byte*>(gapsizeaddr) + gap);
-          if (kDebugStackWalk) {
-            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
-                next_frame;
-          }
-          cur_quick_frame_ = next_frame;
-          method = *next_frame;
-          CHECK(method != nullptr);
-        } else {
-          SanityCheckFrame();
-          bool should_continue = VisitFrame();
-          if (UNLIKELY(!should_continue)) {
-            return;
-          }
+        SanityCheckFrame();
+        bool should_continue = VisitFrame();
+        if (UNLIKELY(!should_continue)) {
+          return;
         }
+
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
         }
         size_t frame_size = method->GetFrameSizeInBytes();
         // Compute PC for next stack frame from return PC.
-        size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
+        size_t return_pc_offset = method->GetReturnPcOffsetInBytes(frame_size);
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
-        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -398,10 +355,6 @@
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
-          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
-        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 73a823a..2e32f51 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -40,7 +40,7 @@
 
 class Context;
 class ShadowFrame;
-class StackIndirectReferenceTable;
+class HandleScope;
 class ScopedObjectAccess;
 class Thread;
 
@@ -102,14 +102,6 @@
   kVRegNonSpecialTempBaseReg = -3,
 };
 
-// Special object used to mark the gap in the stack placed when a stack
-// overflow fault occurs during implicit stack checking.  This is not
-// a real object - it is used simply as a valid address to which a
-// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
-// into the stack during the stack overflow signal handling to mark the gap
-// in which the memory is protected against read and write.
-extern void* stack_overflow_gap_marker;
-
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
@@ -314,6 +306,11 @@
     return method_;
   }
 
+  mirror::ArtMethod** GetMethodAddress() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(method_ != nullptr);
+    return &method_;
+  }
+
   mirror::Object* GetThisObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* GetThisObject(uint16_t num_ins) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -397,12 +394,7 @@
 #endif
   // Link to previous shadow frame or NULL.
   ShadowFrame* link_;
-#if defined(ART_USE_PORTABLE_COMPILER)
-  // TODO: make const in the portable case.
   mirror::ArtMethod* method_;
-#else
-  mirror::ArtMethod* const method_;
-#endif
   uint32_t dex_pc_;
   uint32_t vregs_[0];
 
@@ -526,6 +518,16 @@
     }
   }
 
+  mirror::ArtMethod** GetMethodAddress() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (cur_shadow_frame_ != nullptr) {
+      return cur_shadow_frame_->GetMethodAddress();
+    } else if (cur_quick_frame_ != nullptr) {
+      return cur_quick_frame_;
+    } else {
+      return nullptr;
+    }
+  }
+
   bool IsShadowFrame() const {
     return cur_shadow_frame_ != nullptr;
   }
@@ -685,10 +687,10 @@
     return cur_shadow_frame_;
   }
 
-  StackIndirectReferenceTable* GetCurrentSirt() const {
+  HandleScope* GetCurrentHandleScope() const {
     mirror::ArtMethod** sp = GetCurrentQuickFrame();
-    ++sp;  // Skip Method*; SIRT comes next;
-    return reinterpret_cast<StackIndirectReferenceTable*>(sp);
+    ++sp;  // Skip Method*; handle scope comes next;
+    return reinterpret_cast<HandleScope*>(sp);
   }
 
   std::string DescribeLocation() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h
deleted file mode 100644
index 3b632e7..0000000
--- a/runtime/stack_indirect_reference_table.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_STACK_INDIRECT_REFERENCE_TABLE_H_
-#define ART_RUNTIME_STACK_INDIRECT_REFERENCE_TABLE_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "stack.h"
-#include "utils.h"
-
-namespace art {
-namespace mirror {
-class Object;
-}
-class Thread;
-
-// Stack allocated indirect reference table. It can allocated within
-// the bridge frame between managed and native code backed by stack
-// storage or manually allocated by SirtRef to hold one reference.
-class StackIndirectReferenceTable {
- public:
-  explicit StackIndirectReferenceTable(mirror::Object* object) :
-      link_(NULL), number_of_references_(1) {
-    references_[0].Assign(object);
-  }
-
-  ~StackIndirectReferenceTable() {}
-
-  // Number of references contained within this SIRT.
-  uint32_t NumberOfReferences() const {
-    return number_of_references_;
-  }
-
-  // We have versions with and without explicit pointer size of the following. The first two are
-  // used at runtime, so OFFSETOF_MEMBER computes the right offsets automatically. The last one
-  // takes the pointer size explicitly so that at compile time we can cross-compile correctly.
-
-  // Returns the size of a StackIndirectReferenceTable containing num_references sirts.
-  static size_t SizeOf(uint32_t num_references) {
-    size_t header_size = OFFSETOF_MEMBER(StackIndirectReferenceTable, references_);
-    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
-    return header_size + data_size;
-  }
-
-  // Get the size of the SIRT for the number of entries, with padding added for potential alignment.
-  static size_t GetAlignedSirtSize(uint32_t num_references) {
-    size_t sirt_size = SizeOf(num_references);
-    return RoundUp(sirt_size, 8);
-  }
-
-  // Get the size of the SIRT for the number of entries, with padding added for potential alignment.
-  static size_t GetAlignedSirtSizeTarget(size_t pointer_size, uint32_t num_references) {
-    // Assume that the layout is packed.
-    size_t header_size = pointer_size + sizeof(number_of_references_);
-    // This assumes there is no layout change between 32 and 64b.
-    size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
-    size_t sirt_size = header_size + data_size;
-    return RoundUp(sirt_size, 8);
-  }
-
-  // Link to previous SIRT or NULL.
-  StackIndirectReferenceTable* GetLink() const {
-    return link_;
-  }
-
-  void SetLink(StackIndirectReferenceTable* sirt) {
-    DCHECK_NE(this, sirt);
-    link_ = sirt;
-  }
-
-  // Sets the number_of_references_ field for constructing tables out of raw memory. Warning: will
-  // not resize anything.
-  void SetNumberOfReferences(uint32_t num_references) {
-    number_of_references_ = num_references;
-  }
-
-  mirror::Object* GetReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    return references_[i].AsMirrorPtr();
-  }
-
-  StackReference<mirror::Object>* GetStackReference(size_t i)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    return &references_[i];
-  }
-
-  void SetReference(size_t i, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK_LT(i, number_of_references_);
-    references_[i].Assign(object);
-  }
-
-  bool Contains(StackReference<mirror::Object>* sirt_entry) const {
-    // A SIRT should always contain something. One created by the
-    // jni_compiler should have a jobject/jclass as a native method is
-    // passed in a this pointer or a class
-    DCHECK_GT(number_of_references_, 0U);
-    return ((&references_[0] <= sirt_entry)
-            && (sirt_entry <= (&references_[number_of_references_ - 1])));
-  }
-
-  // Offset of link within SIRT, used by generated code
-  static size_t LinkOffset(size_t pointer_size) {
-    return 0;
-  }
-
-  // Offset of length within SIRT, used by generated code
-  static size_t NumberOfReferencesOffset(size_t pointer_size) {
-    return pointer_size;
-  }
-
-  // Offset of link within SIRT, used by generated code
-  static size_t ReferencesOffset(size_t pointer_size) {
-    return pointer_size + sizeof(number_of_references_);
-  }
-
- private:
-  StackIndirectReferenceTable() {}
-
-  StackIndirectReferenceTable* link_;
-  uint32_t number_of_references_;
-
-  // number_of_references_ are available if this is allocated and filled in by jni_compiler.
-  StackReference<mirror::Object> references_[1];
-
-  DISALLOW_COPY_AND_ASSIGN(StackIndirectReferenceTable);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_STACK_INDIRECT_REFERENCE_TABLE_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index e67a64f..41cfc58 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -44,6 +44,8 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
+#include "handle_scope.h"
+#include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
@@ -54,14 +56,14 @@
 #include "monitor.h"
 #include "object_utils.h"
 #include "quick_exception_handler.h"
+#include "quick/quick_method_frame_info.h"
 #include "reflection.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 #include "stack.h"
-#include "stack_indirect_reference_table.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
@@ -155,11 +157,7 @@
     self->tlsPtr_.opeer = soa.Decode<mirror::Object*>(self->tlsPtr_.jpeer);
     self->GetJniEnv()->DeleteGlobalRef(self->tlsPtr_.jpeer);
     self->tlsPtr_.jpeer = nullptr;
-
-    {
-      SirtRef<mirror::String> thread_name(self, self->GetThreadName(soa));
-      self->SetThreadName(thread_name->ToModifiedUtf8().c_str());
-    }
+    self->SetThreadName(self->GetThreadName(soa)->ToModifiedUtf8().c_str());
     Dbg::PostThreadStart(self);
 
     // Invoke the 'run' method of our java.lang.Thread.
@@ -173,7 +171,7 @@
   return nullptr;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
@@ -188,7 +186,8 @@
   return result;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, jobject java_thread) {
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
+                                  jobject java_thread) {
   return FromManagedThread(soa, soa.Decode<mirror::Object*>(java_thread));
 }
 
@@ -243,10 +242,16 @@
   pregion -= kStackOverflowProtectedSize;
 
   // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.
+  // need to do this on the main stack.  We only need to touch one byte per page.
   if (is_main_stack) {
-    memset(pregion, 0x55, kStackOverflowProtectedSize);
+    byte* start = pregion;
+    byte* end = pregion + kStackOverflowProtectedSize;
+    while (start < end) {
+      *start = static_cast<byte>(0);
+      start += kPageSize;
+    }
   }
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
@@ -255,6 +260,11 @@
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
+
+  // Tell the kernel that we won't be needing these pages any more.
+  if (is_main_stack) {
+    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+  }
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
@@ -418,8 +428,9 @@
                     reinterpret_cast<jlong>(self));
 
   ScopedObjectAccess soa(self);
-  SirtRef<mirror::String> peer_thread_name(soa.Self(), GetThreadName(soa));
-  if (peer_thread_name.get() == nullptr) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName(soa)));
+  if (peer_thread_name.Get() == nullptr) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
     // available (in the compiler, in tests), we manually assign the
@@ -429,10 +440,10 @@
     } else {
       InitPeer<false>(soa, thread_is_daemon, thread_group, thread_name.get(), thread_priority);
     }
-    peer_thread_name.reset(GetThreadName(soa));
+    peer_thread_name.Assign(GetThreadName(soa));
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
-  if (peer_thread_name.get() != nullptr) {
+  if (peer_thread_name.Get() != nullptr) {
     SetThreadName(peer_thread_name->ToModifiedUtf8().c_str());
   }
 }
@@ -546,7 +557,7 @@
   DumpStack(os);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessUnchecked& soa) const {
+mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   return (tlsPtr_.opeer != nullptr) ? reinterpret_cast<mirror::String*>(f->GetObject(tlsPtr_.opeer)) : nullptr;
 }
@@ -922,7 +933,7 @@
 }
 
 void Thread::DumpJavaStack(std::ostream& os) const {
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   StackDumpVisitor dumper(os, const_cast<Thread*>(this), context.get(),
                           !tls32_.throwing_OutOfMemoryError);
   dumper.WalkStack();
@@ -938,8 +949,7 @@
     // If we're currently in native code, dump that stack before dumping the managed stack.
     if (dump_for_abort || ShouldShowNativeStack(this)) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      SirtRef<mirror::ArtMethod> method_ref(Thread::Current(), GetCurrentMethod(nullptr));
-      DumpNativeStack(os, GetTid(), "  native: ", method_ref.get());
+      DumpNativeStack(os, GetTid(), "  native: ", GetCurrentMethod(nullptr));
     }
     DumpJavaStack(os);
   } else {
@@ -1094,8 +1104,9 @@
         soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
     if (lock != nullptr) {
-      SirtRef<mirror::Object> sirt_obj(self, lock);
-      ObjectLock<mirror::Object> locker(self, &sirt_obj);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Object> h_obj(hs.NewHandle(lock));
+      ObjectLock<mirror::Object> locker(self, h_obj);
       locker.NotifyAll();
     }
   }
@@ -1195,28 +1206,28 @@
   }
 }
 
-size_t Thread::NumSirtReferences() {
+size_t Thread::NumHandleReferences() {
   size_t count = 0;
-  for (StackIndirectReferenceTable* cur = tlsPtr_.top_sirt; cur; cur = cur->GetLink()) {
+  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
     count += cur->NumberOfReferences();
   }
   return count;
 }
 
-bool Thread::SirtContains(jobject obj) const {
-  StackReference<mirror::Object>* sirt_entry =
+bool Thread::HandleScopeContains(jobject obj) const {
+  StackReference<mirror::Object>* hs_entry =
       reinterpret_cast<StackReference<mirror::Object>*>(obj);
-  for (StackIndirectReferenceTable* cur = tlsPtr_.top_sirt; cur; cur = cur->GetLink()) {
-    if (cur->Contains(sirt_entry)) {
+  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
+    if (cur->Contains(hs_entry)) {
       return true;
     }
   }
-  // JNI code invoked from portable code uses shadow frames rather than the SIRT.
-  return tlsPtr_.managed_stack.ShadowFramesContain(sirt_entry);
+  // JNI code invoked from portable code uses shadow frames rather than the handle scope.
+  return tlsPtr_.managed_stack.ShadowFramesContain(hs_entry);
 }
 
-void Thread::SirtVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id) {
-  for (StackIndirectReferenceTable* cur = tlsPtr_.top_sirt; cur; cur = cur->GetLink()) {
+void Thread::HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id) {
+  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
     size_t num_refs = cur->NumberOfReferences();
     for (size_t j = 0; j < num_refs; ++j) {
       mirror::Object* object = cur->GetReference(j);
@@ -1243,21 +1254,19 @@
   if (kind == kLocal) {
     IndirectReferenceTable& locals = tlsPtr_.jni_env->locals;
     result = locals.Get(ref);
-  } else if (kind == kSirtOrInvalid) {
+  } else if (kind == kHandleScopeOrInvalid) {
     // TODO: make stack indirect reference table lookup more efficient.
-    // Check if this is a local reference in the SIRT.
-    if (LIKELY(SirtContains(obj))) {
-      // Read from SIRT.
+    // Check if this is a local reference in the handle scope.
+    if (LIKELY(HandleScopeContains(obj))) {
+      // Read from handle scope.
       result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr();
       VerifyObject(result);
     } else {
       result = kInvalidIndirectRefObject;
     }
   } else if (kind == kGlobal) {
-    JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    IndirectReferenceTable& globals = vm->globals;
-    ReaderMutexLock mu(const_cast<Thread*>(this), vm->globals_lock);
-    result = const_cast<mirror::Object*>(globals.Get(ref));
+    JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
+    result = vm->globals.SynchronizedGet(const_cast<Thread*>(this), &vm->globals_lock, ref);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
     result = Runtime::Current()->GetJavaVM()->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
@@ -1358,11 +1367,11 @@
   bool Init(int depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Allocate method trace with an extra slot that will hold the PC trace
-    SirtRef<mirror::ObjectArray<mirror::Object> >
-        method_trace(self_,
-                     Runtime::Current()->GetClassLinker()->AllocObjectArray<mirror::Object>(self_,
-                                                                                            depth + 1));
-    if (method_trace.get() == nullptr) {
+    StackHandleScope<1> hs(self_);
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Handle<mirror::ObjectArray<mirror::Object>> method_trace(
+        hs.NewHandle(class_linker->AllocObjectArray<mirror::Object>(self_, depth + 1)));
+    if (method_trace.Get() == nullptr) {
       return false;
     }
     mirror::IntArray* dex_pc_trace = mirror::IntArray::Alloc(self_, depth);
@@ -1377,7 +1386,7 @@
     const char* last_no_suspend_cause =
         self_->StartAssertNoThreadSuspension("Building internal stack trace");
     CHECK(last_no_suspend_cause == nullptr) << last_no_suspend_cause;
-    method_trace_ = method_trace.get();
+    method_trace_ = method_trace.Get();
     dex_pc_trace_ = dex_pc_trace;
     return true;
   }
@@ -1424,7 +1433,7 @@
 };
 
 template<bool kTransactionActive>
-jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const {
+jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const {
   // Compute depth of stack
   CountStackDepthVisitor count_visitor(const_cast<Thread*>(this));
   count_visitor.WalkStack();
@@ -1447,11 +1456,14 @@
   }
   return soa.AddLocalReference<jobjectArray>(trace);
 }
-template jobject Thread::CreateInternalStackTrace<false>(const ScopedObjectAccessUnchecked& soa) const;
-template jobject Thread::CreateInternalStackTrace<true>(const ScopedObjectAccessUnchecked& soa) const;
+template jobject Thread::CreateInternalStackTrace<false>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
+template jobject Thread::CreateInternalStackTrace<true>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
 
-jobjectArray Thread::InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-    jobject internal, jobjectArray output_array, int* stack_depth) {
+jobjectArray Thread::InternalStackTraceToStackTraceElementArray(
+    const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array,
+    int* stack_depth) {
   // Decode the internal stack trace into the depth, method trace and PC trace
   int32_t depth = soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal)->GetLength() - 1;
 
@@ -1487,11 +1499,12 @@
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
     MethodHelper mh(method);
     int32_t line_number;
-    SirtRef<mirror::String> class_name_object(soa.Self(), nullptr);
-    SirtRef<mirror::String> source_name_object(soa.Self(), nullptr);
+    StackHandleScope<3> hs(soa.Self());
+    auto class_name_object(hs.NewHandle<mirror::String>(nullptr));
+    auto source_name_object(hs.NewHandle<mirror::String>(nullptr));
     if (method->IsProxyMethod()) {
       line_number = -1;
-      class_name_object.reset(method->GetDeclaringClass()->GetName());
+      class_name_object.Assign(method->GetDeclaringClass()->GetName());
       // source_name_object intentionally left null for proxy methods
     } else {
       mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
@@ -1502,24 +1515,23 @@
       const char* descriptor = mh.GetDeclaringClassDescriptor();
       CHECK(descriptor != nullptr);
       std::string class_name(PrettyDescriptor(descriptor));
-      class_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
-      if (class_name_object.get() == nullptr) {
+      class_name_object.Assign(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
+      if (class_name_object.Get() == nullptr) {
         return nullptr;
       }
       const char* source_file = mh.GetDeclaringClassSourceFile();
       if (source_file != nullptr) {
-        source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
-        if (source_name_object.get() == nullptr) {
+        source_name_object.Assign(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
+        if (source_name_object.Get() == nullptr) {
           return nullptr;
         }
       }
     }
     const char* method_name = mh.GetName();
     CHECK(method_name != nullptr);
-    SirtRef<mirror::String> method_name_object(soa.Self(),
-                                               mirror::String::AllocFromModifiedUtf8(soa.Self(),
-                                                                                     method_name));
-    if (method_name_object.get() == nullptr) {
+    Handle<mirror::String> method_name_object(
+        hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), method_name)));
+    if (method_name_object.Get() == nullptr) {
       return nullptr;
     }
     mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(
@@ -1562,23 +1574,24 @@
                                       const char* msg) {
   DCHECK_EQ(this, Thread::Current());
   ScopedObjectAccessUnchecked soa(this);
+  StackHandleScope<5> hs(soa.Self());
   // Ensure we don't forget arguments over object allocation.
-  SirtRef<mirror::Object> saved_throw_this(this, throw_location.GetThis());
-  SirtRef<mirror::ArtMethod> saved_throw_method(this, throw_location.GetMethod());
+  Handle<mirror::Object> saved_throw_this(hs.NewHandle(throw_location.GetThis()));
+  Handle<mirror::ArtMethod> saved_throw_method(hs.NewHandle(throw_location.GetMethod()));
   // Ignore the cause throw location. TODO: should we report this as a re-throw?
   ScopedLocalRef<jobject> cause(GetJniEnv(), soa.AddLocalReference<jobject>(GetException(nullptr)));
   ClearException();
   Runtime* runtime = Runtime::Current();
 
   mirror::ClassLoader* cl = nullptr;
-  if (saved_throw_method.get() != nullptr) {
-    cl = saved_throw_method.get()->GetDeclaringClass()->GetClassLoader();
+  if (saved_throw_method.Get() != nullptr) {
+    cl = saved_throw_method.Get()->GetDeclaringClass()->GetClassLoader();
   }
-  SirtRef<mirror::ClassLoader> class_loader(this, cl);
-  SirtRef<mirror::Class>
-      exception_class(this, runtime->GetClassLinker()->FindClass(this, exception_class_descriptor,
-                                                                 class_loader));
-  if (UNLIKELY(exception_class.get() == nullptr)) {
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(cl));
+  Handle<mirror::Class> exception_class(
+      hs.NewHandle(runtime->GetClassLinker()->FindClass(this, exception_class_descriptor,
+                                                        class_loader)));
+  if (UNLIKELY(exception_class.Get() == nullptr)) {
     CHECK(IsExceptionPending());
     LOG(ERROR) << "No exception class " << PrettyDescriptor(exception_class_descriptor);
     return;
@@ -1589,12 +1602,12 @@
     return;
   }
   DCHECK(!runtime->IsStarted() || exception_class->IsThrowableClass());
-  SirtRef<mirror::Throwable> exception(this,
-                                down_cast<mirror::Throwable*>(exception_class->AllocObject(this)));
+  Handle<mirror::Throwable> exception(
+      hs.NewHandle(down_cast<mirror::Throwable*>(exception_class->AllocObject(this))));
 
   // If we couldn't allocate the exception, throw the pre-allocated out of memory exception.
-  if (exception.get() == nullptr) {
-    ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
+  if (exception.Get() == nullptr) {
+    ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, Runtime::Current()->GetPreAllocatedOutOfMemoryError());
     return;
@@ -1639,9 +1652,16 @@
     if (cause.get() != nullptr) {
       exception->SetCause(down_cast<mirror::Throwable*>(DecodeJObject(cause.get())));
     }
-    ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
+    ScopedLocalRef<jobject> trace(GetJniEnv(),
+                                  Runtime::Current()->IsActiveTransaction()
+                                      ? CreateInternalStackTrace<true>(soa)
+                                      : CreateInternalStackTrace<false>(soa));
+    if (trace.get() != nullptr) {
+      exception->SetStackState(down_cast<mirror::Throwable*>(DecodeJObject(trace.get())));
+    }
+    ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                          throw_location.GetDexPc());
-    SetException(gc_safe_throw_location, exception.get());
+    SetException(gc_safe_throw_location, exception.Get());
   } else {
     jvalue jv_args[2];
     size_t i = 0;
@@ -1654,11 +1674,11 @@
       jv_args[i].l = cause.get();
       ++i;
     }
-    InvokeWithJValues(soa, exception.get(), soa.EncodeMethod(exception_init_method), jv_args);
+    InvokeWithJValues(soa, exception.Get(), soa.EncodeMethod(exception_init_method), jv_args);
     if (LIKELY(!IsExceptionPending())) {
-      ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
+      ThrowLocation gc_safe_throw_location(saved_throw_this.Get(), saved_throw_method.Get(),
                                            throw_location.GetDexPc());
-      SetException(gc_safe_throw_location, exception.get());
+      SetException(gc_safe_throw_location, exception.Get());
     }
   }
 }
@@ -1715,7 +1735,7 @@
   DO_THREAD_OFFSET(TopOfManagedStackOffset<ptr_size>(), "top_quick_frame_method")
   DO_THREAD_OFFSET(TopOfManagedStackPcOffset<ptr_size>(), "top_quick_frame_pc")
   DO_THREAD_OFFSET(TopShadowFrameOffset<ptr_size>(), "top_shadow_frame")
-  DO_THREAD_OFFSET(TopSirtOffset<ptr_size>(), "top_sirt")
+  DO_THREAD_OFFSET(TopHandleScopeOffset<ptr_size>(), "top_handle_scope")
   DO_THREAD_OFFSET(ThreadSuspendTriggerOffset<ptr_size>(), "suspend_trigger")
 #undef DO_THREAD_OFFSET
 
@@ -1946,10 +1966,13 @@
   }
 
   void VisitShadowFrame(ShadowFrame* shadow_frame) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* m = shadow_frame->GetMethod();
+    mirror::ArtMethod** method_addr = shadow_frame->GetMethodAddress();
+    visitor_(reinterpret_cast<mirror::Object**>(method_addr), 0 /*ignored*/, this);
+    mirror::ArtMethod* m = *method_addr;
+    DCHECK(m != nullptr);
     size_t num_regs = shadow_frame->NumberOfVRegs();
     if (m->IsNative() || shadow_frame->HasReferenceArray()) {
-      // SIRT for JNI or References for interpreter.
+      // handle scope for JNI or References for interpreter.
       for (size_t reg = 0; reg < num_regs; ++reg) {
         mirror::Object* ref = shadow_frame->GetVRegReference(reg);
         if (ref != nullptr) {
@@ -1987,7 +2010,9 @@
 
  private:
   void VisitQuickFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod* m = GetMethod();
+    mirror::ArtMethod** method_addr = GetMethodAddress();
+    visitor_(reinterpret_cast<mirror::Object**>(method_addr), 0 /*ignored*/, this);
+    mirror::ArtMethod* m = *method_addr;
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
       const uint8_t* native_gc_map = m->GetNativeGcMap();
@@ -1999,12 +2024,14 @@
       size_t num_regs = std::min(map.RegWidth() * 8,
                                  static_cast<size_t>(code_item->registers_size_));
       if (num_regs > 0) {
-        const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
+        Runtime* runtime = Runtime::Current();
+        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m);
+        uintptr_t native_pc_offset = m->NativePcOffset(GetCurrentQuickFramePc(), entry_point);
+        const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
         DCHECK(reg_bitmap != nullptr);
-        const VmapTable vmap_table(m->GetVmapTable());
-        uint32_t core_spills = m->GetCoreSpillMask();
-        uint32_t fp_spills = m->GetFpSpillMask();
-        size_t frame_size = m->GetFrameSizeInBytes();
+        const void* code_pointer = mirror::ArtMethod::EntryPointToCodePointer(entry_point);
+        const VmapTable vmap_table(m->GetVmapTable(code_pointer));
+        QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
         // For all dex registers in the bitmap
         mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
         DCHECK(cur_quick_frame != nullptr);
@@ -2013,7 +2040,8 @@
           if (TestBitmap(reg, reg_bitmap)) {
             uint32_t vmap_offset;
             if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-              int vmap_reg = vmap_table.ComputeRegister(core_spills, vmap_offset, kReferenceVReg);
+              int vmap_reg = vmap_table.ComputeRegister(frame_info.CoreSpillMask(), vmap_offset,
+                                                        kReferenceVReg);
               // This is sound as spilled GPRs will be word sized (ie 32 or 64bit).
               mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(vmap_reg));
               if (*ref_addr != nullptr) {
@@ -2022,8 +2050,8 @@
             } else {
               StackReference<mirror::Object>* ref_addr =
                   reinterpret_cast<StackReference<mirror::Object>*>(
-                      GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size,
-                                  reg));
+                      GetVRegAddr(cur_quick_frame, code_item, frame_info.CoreSpillMask(),
+                                  frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), reg));
               mirror::Object* ref = ref_addr->AsMirrorPtr();
               if (ref != nullptr) {
                 mirror::Object* new_ref = ref;
@@ -2088,7 +2116,7 @@
   }
   tlsPtr_.jni_env->locals.VisitRoots(visitor, arg, thread_id, kRootJNILocal);
   tlsPtr_.jni_env->monitors.VisitRoots(visitor, arg, thread_id, kRootJNIMonitor);
-  SirtVisitRoots(visitor, arg, thread_id);
+  HandleScopeVisitRoots(visitor, arg, thread_id);
   if (tlsPtr_.debug_invoke_req != nullptr) {
     tlsPtr_.debug_invoke_req->VisitRoots(visitor, arg, thread_id, kRootDebugger);
   }
@@ -2124,7 +2152,7 @@
 }
 
 void Thread::VerifyStackImpl() {
-  UniquePtr<Context> context(Context::Create());
+  std::unique_ptr<Context> context(Context::Create());
   RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetThreadId());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
diff --git a/runtime/thread.h b/runtime/thread.h
index 8c17082..9a7cb48 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -21,6 +21,7 @@
 #include <deque>
 #include <iosfwd>
 #include <list>
+#include <memory>
 #include <string>
 
 #include "base/macros.h"
@@ -31,15 +32,14 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/allocator/rosalloc.h"
 #include "globals.h"
+#include "handle_scope.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "runtime_stats.h"
 #include "stack.h"
-#include "stack_indirect_reference_table.h"
 #include "thread_state.h"
 #include "throw_location.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -72,8 +72,7 @@
 struct JNIEnvExt;
 class Monitor;
 class Runtime;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 struct SingleStepControl;
 class Thread;
@@ -98,11 +97,16 @@
   // Space to throw a StackOverflowError in.
   // TODO: shrink reserved space, in particular for 64bit.
 #if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
+  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
 #elif defined(__aarch64__)
   // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
   // But this one works rather well.
   static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
+#elif defined(__i386__)
+  // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+  // test-art-host-run-test-interpreter-018-stack-overflow
+  // test-art-host-run-test-interpreter-107-int-math2
+  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
 #else
   static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
 #endif
@@ -135,12 +139,12 @@
 
   static Thread* Current();
 
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts,
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts,
                                    mirror::Object* thread_peer)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts, jobject thread)
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts, jobject thread)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -271,7 +275,7 @@
   }
 
   // Returns the java.lang.Thread's name, or NULL if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessUnchecked& ts) const
+  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
@@ -392,11 +396,11 @@
   // Convert a jobject into a Object*
   mirror::Object* DecodeJObject(jobject obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Object* GetMonitorEnterObject() const {
+  mirror::Object* GetMonitorEnterObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return tlsPtr_.monitor_enter_object;
   }
 
-  void SetMonitorEnterObject(mirror::Object* obj) {
+  void SetMonitorEnterObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.monitor_enter_object = obj;
   }
 
@@ -453,15 +457,16 @@
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[].
   template<bool kTransactionActive>
-  jobject CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const
+  jobject CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Convert an internal stack trace representation (returned by CreateInternalStackTrace) to a
   // StackTraceElement[]. If output_array is NULL, a new array is created, otherwise as many
   // frames as will fit are written into the given array. If stack_depth is non-NULL, it's updated
   // with the number of valid frames in the returned array.
-  static jobjectArray InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-      jobject internal, jobjectArray output_array = nullptr, int* stack_depth = nullptr)
+  static jobjectArray InternalStackTraceToStackTraceElementArray(
+      const ScopedObjectAccessAlreadyRunnable& soa, jobject internal,
+      jobjectArray output_array = nullptr, int* stack_depth = nullptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -648,35 +653,40 @@
     return tlsPtr_.managed_stack.NumJniShadowFrameReferences();
   }
 
-  // Number of references in SIRTs on this thread.
-  size_t NumSirtReferences();
+  // Number of references in handle scope on this thread.
+  size_t NumHandleReferences();
 
-  // Number of references allocated in SIRTs & JNI shadow frames on this thread.
+  // Number of references allocated in handle scopes & JNI shadow frames on this thread.
   size_t NumStackReferences() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return NumSirtReferences() + NumJniShadowFrameReferences();
+    return NumHandleReferences() + NumJniShadowFrameReferences();
   };
 
   // Is the given obj in this thread's stack indirect reference table?
-  bool SirtContains(jobject obj) const;
+  bool HandleScopeContains(jobject obj) const;
 
-  void SirtVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id)
+  void HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t thread_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PushSirt(StackIndirectReferenceTable* sirt) {
-    sirt->SetLink(tlsPtr_.top_sirt);
-    tlsPtr_.top_sirt = sirt;
+  HandleScope* GetTopHandleScope() {
+    return tlsPtr_.top_handle_scope;
   }
 
-  StackIndirectReferenceTable* PopSirt() {
-    StackIndirectReferenceTable* sirt = tlsPtr_.top_sirt;
-    DCHECK(sirt != NULL);
-    tlsPtr_.top_sirt = tlsPtr_.top_sirt->GetLink();
-    return sirt;
+  void PushHandleScope(HandleScope* handle_scope) {
+    handle_scope->SetLink(tlsPtr_.top_handle_scope);
+    tlsPtr_.top_handle_scope = handle_scope;
+  }
+
+  HandleScope* PopHandleScope() {
+    HandleScope* handle_scope = tlsPtr_.top_handle_scope;
+    DCHECK(handle_scope != nullptr);
+    tlsPtr_.top_handle_scope = tlsPtr_.top_handle_scope->GetLink();
+    return handle_scope;
   }
 
   template<size_t pointer_size>
-  static ThreadOffset<pointer_size> TopSirtOffset() {
-    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, top_sirt));
+  static ThreadOffset<pointer_size> TopHandleScopeOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                top_handle_scope));
   }
 
   DebugInvokeReq* GetInvokeReq() const {
@@ -950,7 +960,7 @@
       managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), self(nullptr), opeer(nullptr),
       jpeer(nullptr), stack_begin(nullptr), stack_size(0), throw_location(),
       stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
-      top_sirt(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
+      top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
       deoptimization_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
@@ -1006,8 +1016,8 @@
     // If we're blocked in MonitorEnter, this is the object we're trying to lock.
     mirror::Object* monitor_enter_object;
 
-    // Top of linked list of stack indirect reference tables or NULL for none.
-    StackIndirectReferenceTable* top_sirt;
+    // Top of linked list of handle scopes or nullptr for none.
+    HandleScope* top_handle_scope;
 
     // Needed to get the right ClassLoader in JNI_OnLoad, but also
     // useful for testing.
@@ -1035,9 +1045,6 @@
     // A cached pthread_t for the pthread underlying this Thread*.
     pthread_t pthread_self;
 
-    // Support for Mutex lock hierarchy bug detection.
-    BaseMutex* held_mutexes[kLockLevelCount];
-
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
@@ -1064,6 +1071,9 @@
     // Thread-local allocation stack data/routines.
     mirror::Object** thread_local_alloc_stack_top;
     mirror::Object** thread_local_alloc_stack_end;
+
+    // Support for Mutex lock hierarchy bug detection.
+    BaseMutex* held_mutexes[kLockLevelCount];
   } tlsPtr_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
@@ -1082,6 +1092,7 @@
   friend class Runtime;  // For CreatePeer.
   friend class ScopedThreadStateChange;
   friend class SignalCatcher;  // For SetStateUnsafe.
+  friend class StubTest;  // For accessing entrypoints.
   friend class ThreadList;  // For ~Thread and Destroy.
 
   DISALLOW_COPY_AND_ASSIGN(Thread);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 4eb580b..388c9b4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -40,8 +40,7 @@
 namespace art {
 
 ThreadList::ThreadList()
-    : allocated_ids_lock_("allocated thread ids lock"),
-      suspend_all_count_(0), debug_suspend_all_count_(0),
+    : suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
@@ -650,7 +649,7 @@
         // can happen if the debugger lets go while a SIGQUIT thread
         // dump event is pending (assuming SignalCatcher was resumed for
         // just long enough to try to grab the thread-suspend lock).
-        LOG(DEBUG) << *self << " still suspended after undo "
+        LOG(WARNING) << *self << " still suspended after undo "
                    << "(suspend count=" << self->GetSuspendCount() << ")";
       }
     }
@@ -754,7 +753,7 @@
   if (VLOG_IS_ON(threads)) {
     std::ostringstream oss;
     self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
-    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss;
+    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
   }
 
   // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
@@ -849,7 +848,7 @@
 }
 
 uint32_t ThreadList::AllocThreadId(Thread* self) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
     if (!allocated_ids_[i]) {
       allocated_ids_.set(i);
@@ -861,7 +860,7 @@
 }
 
 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   --id;  // Zero is reserved to mean "invalid".
   DCHECK(allocated_ids_[id]) << id;
   allocated_ids_.reset(id);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index a574340..d46987a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -132,7 +132,7 @@
 
  private:
   uint32_t AllocThreadId(Thread* self);
-  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
+  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(Locks::allocated_thread_ids_lock_);
 
   bool Contains(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
   bool Contains(pid_t tid) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
@@ -151,8 +151,7 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
-  mutable Mutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
+  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
 
   // The actual list of all threads.
   std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index b8735a3..c816c84 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -53,7 +53,7 @@
 
   ThreadPool* const thread_pool_;
   const std::string name_;
-  UniquePtr<MemMap> stack_;
+  std::unique_ptr<MemMap> stack_;
   pthread_t pthread_;
 
  private:
@@ -96,7 +96,7 @@
   void SetMaxActiveWorkers(size_t threads);
 
  protected:
-  // Get a task to run, blocks if there are no tasks left
+  // get a task to run, blocks if there are no tasks left
   virtual Task* GetTask(Thread* self);
 
   // Try to get a task, returning NULL if there is none available.
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index c1a1ad7..292c94f 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -69,7 +69,7 @@
   // Wait for tasks to complete.
   thread_pool.Wait(self, true, false);
   // Make sure that we finished all the work.
-  EXPECT_EQ(num_tasks, count);
+  EXPECT_EQ(num_tasks, count.LoadSequentiallyConsistent());
 }
 
 TEST_F(ThreadPoolTest, StopStart) {
@@ -82,7 +82,7 @@
   }
   usleep(200);
   // Check that no threads started prematurely.
-  EXPECT_EQ(0, count);
+  EXPECT_EQ(0, count.LoadSequentiallyConsistent());
   // Signal the threads to start processing tasks.
   thread_pool.StartWorkers(self);
   usleep(200);
@@ -91,10 +91,11 @@
   thread_pool.AddTask(self, new CountTask(&bad_count));
   usleep(200);
   // Ensure that the task added after the workers were stopped doesn't get run.
-  EXPECT_EQ(0, bad_count);
+  EXPECT_EQ(0, bad_count.LoadSequentiallyConsistent());
   // Allow tasks to finish up and delete themselves.
   thread_pool.StartWorkers(self);
-  while (count.Load() != num_tasks && bad_count.Load() != 1) {
+  while (count.LoadSequentiallyConsistent() != num_tasks &&
+      bad_count.LoadSequentiallyConsistent() != 1) {
     usleep(200);
   }
   thread_pool.StopWorkers(self);
@@ -135,7 +136,7 @@
   thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth));
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
-  EXPECT_EQ((1 << depth) - 1, count);
+  EXPECT_EQ((1 << depth) - 1, count.LoadSequentiallyConsistent());
 }
 
 }  // namespace art
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b85eb7e..d53b369 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -119,7 +119,7 @@
 
 Trace* volatile Trace::the_trace_ = NULL;
 pthread_t Trace::sampling_pthread_ = 0U;
-UniquePtr<std::vector<mirror::ArtMethod*> > Trace::temp_stack_trace_;
+std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
 static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
   return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
@@ -339,7 +339,7 @@
   runtime->GetThreadList()->SuspendAll();
 
   // Open trace file if not going directly to ddms.
-  UniquePtr<File> trace_file;
+  std::unique_ptr<File> trace_file;
   if (!direct_to_ddms) {
     if (trace_fd < 0) {
       trace_file.reset(OS::CreateEmptyFile(trace_filename));
diff --git a/runtime/trace.h b/runtime/trace.h
index bf4995a..08da16f 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_TRACE_H_
 #define ART_RUNTIME_TRACE_H_
 
+#include <memory>
 #include <ostream>
 #include <set>
 #include <string>
@@ -27,7 +28,6 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -140,13 +140,13 @@
   static pthread_t sampling_pthread_;
 
   // Used to remember an unused stack trace to avoid re-allocation during sampling.
-  static UniquePtr<std::vector<mirror::ArtMethod*> > temp_stack_trace_;
+  static std::unique_ptr<std::vector<mirror::ArtMethod*>> temp_stack_trace_;
 
   // File to write trace data out to, NULL if direct to ddms.
-  UniquePtr<File> trace_file_;
+  std::unique_ptr<File> trace_file_;
 
   // Buffer to store trace data.
-  UniquePtr<uint8_t> buf_;
+  std::unique_ptr<uint8_t> buf_;
 
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 1dc2da0..3645ed2 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -27,56 +27,57 @@
 
 TEST_F(TransactionTest, Object_class) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;"));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
   Runtime::Current()->ExitTransactionMode();
 
   // Aborting transaction must not clear the Object::class field.
   transaction.Abort();
-  EXPECT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  EXPECT_EQ(h_obj->GetClass(), h_klass.Get());
 }
 
 TEST_F(TransactionTest, Object_monitor) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindSystemClass(soa.Self(),
-                                                                   "Ljava/lang/Object;"));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
 
   // Lock object's monitor outside the transaction.
-  sirt_obj->MonitorEnter(soa.Self());
-  uint32_t old_lock_word = sirt_obj->GetLockWord(false).GetValue();
+  h_obj->MonitorEnter(soa.Self());
+  uint32_t old_lock_word = h_obj->GetLockWord(false).GetValue();
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
   // Unlock object's monitor inside the transaction.
-  sirt_obj->MonitorExit(soa.Self());
-  uint32_t new_lock_word = sirt_obj->GetLockWord(false).GetValue();
+  h_obj->MonitorExit(soa.Self());
+  uint32_t new_lock_word = h_obj->GetLockWord(false).GetValue();
   Runtime::Current()->ExitTransactionMode();
 
   // Aborting transaction must not clear the Object::class field.
   transaction.Abort();
-  uint32_t aborted_lock_word = sirt_obj->GetLockWord(false).GetValue();
+  uint32_t aborted_lock_word = h_obj->GetLockWord(false).GetValue();
   EXPECT_NE(old_lock_word, new_lock_word);
   EXPECT_EQ(aborted_lock_word, new_lock_word);
 }
 
 TEST_F(TransactionTest, Array_length) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindSystemClass(soa.Self(),
-                                                                   "[Ljava/lang/Object;"));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
 
   constexpr int32_t kArraySize = 2;
 
@@ -84,304 +85,301 @@
   Runtime::Current()->EnterTransactionMode(&transaction);
 
   // Allocate an array during transaction.
-  SirtRef<mirror::Array> sirt_obj(soa.Self(),
-                                  mirror::Array::Alloc<true>(soa.Self(), sirt_klass.get(),
-                                                             kArraySize,
-                                                             sirt_klass->GetComponentSize(),
-                                                             Runtime::Current()->GetHeap()->GetCurrentAllocator()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  Handle<mirror::Array> h_obj(
+      hs.NewHandle(
+          mirror::Array::Alloc<true>(soa.Self(), h_klass.Get(), kArraySize,
+                                     h_klass->GetComponentSize(),
+                                     Runtime::Current()->GetHeap()->GetCurrentAllocator())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
   Runtime::Current()->ExitTransactionMode();
 
   // Aborting transaction must not clear the Object::class field.
   transaction.Abort();
-  EXPECT_EQ(sirt_obj->GetLength(), kArraySize);
+  EXPECT_EQ(h_obj->GetLength(), kArraySize);
 }
 
 TEST_F(TransactionTest, StaticFieldsTest) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction")));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction"))));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindClass(soa.Self(), "LStaticFieldsTest;",
-                                                             class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
-  ASSERT_TRUE(sirt_klass->IsInitialized());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStaticFieldsTest;", class_loader)));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->EnsureInitialized(h_klass, true, true);
+  ASSERT_TRUE(h_klass->IsInitialized());
 
   // Lookup fields.
-  mirror::ArtField* booleanField = sirt_klass->FindDeclaredStaticField("booleanField", "Z");
+  mirror::ArtField* booleanField = h_klass->FindDeclaredStaticField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
   ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
-  ASSERT_EQ(booleanField->GetBoolean(sirt_klass.get()), false);
+  ASSERT_EQ(booleanField->GetBoolean(h_klass.Get()), false);
 
-  mirror::ArtField* byteField = sirt_klass->FindDeclaredStaticField("byteField", "B");
+  mirror::ArtField* byteField = h_klass->FindDeclaredStaticField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
   ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
-  ASSERT_EQ(byteField->GetByte(sirt_klass.get()), 0);
+  ASSERT_EQ(byteField->GetByte(h_klass.Get()), 0);
 
-  mirror::ArtField* charField = sirt_klass->FindDeclaredStaticField("charField", "C");
+  mirror::ArtField* charField = h_klass->FindDeclaredStaticField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
   ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
-  ASSERT_EQ(charField->GetChar(sirt_klass.get()), 0u);
+  ASSERT_EQ(charField->GetChar(h_klass.Get()), 0u);
 
-  mirror::ArtField* shortField = sirt_klass->FindDeclaredStaticField("shortField", "S");
+  mirror::ArtField* shortField = h_klass->FindDeclaredStaticField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
   ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
-  ASSERT_EQ(shortField->GetShort(sirt_klass.get()), 0);
+  ASSERT_EQ(shortField->GetShort(h_klass.Get()), 0);
 
-  mirror::ArtField* intField = sirt_klass->FindDeclaredStaticField("intField", "I");
+  mirror::ArtField* intField = h_klass->FindDeclaredStaticField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
   ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  ASSERT_EQ(intField->GetInt(sirt_klass.get()), 0);
+  ASSERT_EQ(intField->GetInt(h_klass.Get()), 0);
 
-  mirror::ArtField* longField = sirt_klass->FindDeclaredStaticField("longField", "J");
+  mirror::ArtField* longField = h_klass->FindDeclaredStaticField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
   ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
-  ASSERT_EQ(longField->GetLong(sirt_klass.get()), static_cast<int64_t>(0));
+  ASSERT_EQ(longField->GetLong(h_klass.Get()), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatField = sirt_klass->FindDeclaredStaticField("floatField", "F");
+  mirror::ArtField* floatField = h_klass->FindDeclaredStaticField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
   ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
-  ASSERT_EQ(floatField->GetFloat(sirt_klass.get()), static_cast<float>(0.0f));
+  ASSERT_EQ(floatField->GetFloat(h_klass.Get()), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleField = sirt_klass->FindDeclaredStaticField("doubleField", "D");
+  mirror::ArtField* doubleField = h_klass->FindDeclaredStaticField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
   ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
-  ASSERT_EQ(doubleField->GetDouble(sirt_klass.get()), static_cast<double>(0.0));
+  ASSERT_EQ(doubleField->GetDouble(h_klass.Get()), static_cast<double>(0.0));
 
-  mirror::ArtField* objectField = sirt_klass->FindDeclaredStaticField("objectField",
+  mirror::ArtField* objectField = h_klass->FindDeclaredStaticField("objectField",
                                                                       "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
   ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-  ASSERT_EQ(objectField->GetObject(sirt_klass.get()), nullptr);
+  ASSERT_EQ(objectField->GetObject(h_klass.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
-  SirtRef<mirror::Class> object_klass(soa.Self(),
-                                      class_linker_->FindSystemClass(soa.Self(),
-                                                                     "Ljava/lang/Object;"));
-  ASSERT_TRUE(object_klass.get() != nullptr);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  Handle<mirror::Class> object_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(object_klass.Get() != nullptr);
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
 
   // Modify fields inside transaction and abort it.
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  booleanField->SetBoolean<true>(sirt_klass.get(), true);
-  byteField->SetByte<true>(sirt_klass.get(), 1);
-  charField->SetChar<true>(sirt_klass.get(), 1u);
-  shortField->SetShort<true>(sirt_klass.get(), 1);
-  intField->SetInt<true>(sirt_klass.get(), 1);
-  longField->SetLong<true>(sirt_klass.get(), 1);
-  floatField->SetFloat<true>(sirt_klass.get(), 1.0);
-  doubleField->SetDouble<true>(sirt_klass.get(), 1.0);
-  objectField->SetObject<true>(sirt_klass.get(), sirt_obj.get());
+  booleanField->SetBoolean<true>(h_klass.Get(), true);
+  byteField->SetByte<true>(h_klass.Get(), 1);
+  charField->SetChar<true>(h_klass.Get(), 1u);
+  shortField->SetShort<true>(h_klass.Get(), 1);
+  intField->SetInt<true>(h_klass.Get(), 1);
+  longField->SetLong<true>(h_klass.Get(), 1);
+  floatField->SetFloat<true>(h_klass.Get(), 1.0);
+  doubleField->SetDouble<true>(h_klass.Get(), 1.0);
+  objectField->SetObject<true>(h_klass.Get(), h_obj.Get());
   Runtime::Current()->ExitTransactionMode();
   transaction.Abort();
 
   // Check values have properly been restored to their original (default) value.
-  EXPECT_EQ(booleanField->GetBoolean(sirt_klass.get()), false);
-  EXPECT_EQ(byteField->GetByte(sirt_klass.get()), 0);
-  EXPECT_EQ(charField->GetChar(sirt_klass.get()), 0u);
-  EXPECT_EQ(shortField->GetShort(sirt_klass.get()), 0);
-  EXPECT_EQ(intField->GetInt(sirt_klass.get()), 0);
-  EXPECT_EQ(longField->GetLong(sirt_klass.get()), static_cast<int64_t>(0));
-  EXPECT_EQ(floatField->GetFloat(sirt_klass.get()), static_cast<float>(0.0f));
-  EXPECT_EQ(doubleField->GetDouble(sirt_klass.get()), static_cast<double>(0.0));
-  EXPECT_EQ(objectField->GetObject(sirt_klass.get()), nullptr);
+  EXPECT_EQ(booleanField->GetBoolean(h_klass.Get()), false);
+  EXPECT_EQ(byteField->GetByte(h_klass.Get()), 0);
+  EXPECT_EQ(charField->GetChar(h_klass.Get()), 0u);
+  EXPECT_EQ(shortField->GetShort(h_klass.Get()), 0);
+  EXPECT_EQ(intField->GetInt(h_klass.Get()), 0);
+  EXPECT_EQ(longField->GetLong(h_klass.Get()), static_cast<int64_t>(0));
+  EXPECT_EQ(floatField->GetFloat(h_klass.Get()), static_cast<float>(0.0f));
+  EXPECT_EQ(doubleField->GetDouble(h_klass.Get()), static_cast<double>(0.0));
+  EXPECT_EQ(objectField->GetObject(h_klass.Get()), nullptr);
 }
 
 TEST_F(TransactionTest, InstanceFieldsTest) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction")));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction"))));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindClass(soa.Self(), "LInstanceFieldsTest;",
-                                                             class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
-  ASSERT_TRUE(sirt_klass->IsInitialized());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LInstanceFieldsTest;", class_loader)));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->EnsureInitialized(h_klass, true, true);
+  ASSERT_TRUE(h_klass->IsInitialized());
 
   // Allocate an InstanceFieldTest object.
-  SirtRef<mirror::Object> sirt_instance(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_instance.get() != nullptr);
+  Handle<mirror::Object> h_instance(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_instance.Get() != nullptr);
 
   // Lookup fields.
-  mirror::ArtField* booleanField = sirt_klass->FindDeclaredInstanceField("booleanField", "Z");
+  mirror::ArtField* booleanField = h_klass->FindDeclaredInstanceField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
   ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
-  ASSERT_EQ(booleanField->GetBoolean(sirt_instance.get()), false);
+  ASSERT_EQ(booleanField->GetBoolean(h_instance.Get()), false);
 
-  mirror::ArtField* byteField = sirt_klass->FindDeclaredInstanceField("byteField", "B");
+  mirror::ArtField* byteField = h_klass->FindDeclaredInstanceField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
   ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
-  ASSERT_EQ(byteField->GetByte(sirt_instance.get()), 0);
+  ASSERT_EQ(byteField->GetByte(h_instance.Get()), 0);
 
-  mirror::ArtField* charField = sirt_klass->FindDeclaredInstanceField("charField", "C");
+  mirror::ArtField* charField = h_klass->FindDeclaredInstanceField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
   ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
-  ASSERT_EQ(charField->GetChar(sirt_instance.get()), 0u);
+  ASSERT_EQ(charField->GetChar(h_instance.Get()), 0u);
 
-  mirror::ArtField* shortField = sirt_klass->FindDeclaredInstanceField("shortField", "S");
+  mirror::ArtField* shortField = h_klass->FindDeclaredInstanceField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
   ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
-  ASSERT_EQ(shortField->GetShort(sirt_instance.get()), 0);
+  ASSERT_EQ(shortField->GetShort(h_instance.Get()), 0);
 
-  mirror::ArtField* intField = sirt_klass->FindDeclaredInstanceField("intField", "I");
+  mirror::ArtField* intField = h_klass->FindDeclaredInstanceField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
   ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  ASSERT_EQ(intField->GetInt(sirt_instance.get()), 0);
+  ASSERT_EQ(intField->GetInt(h_instance.Get()), 0);
 
-  mirror::ArtField* longField = sirt_klass->FindDeclaredInstanceField("longField", "J");
+  mirror::ArtField* longField = h_klass->FindDeclaredInstanceField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
   ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
-  ASSERT_EQ(longField->GetLong(sirt_instance.get()), static_cast<int64_t>(0));
+  ASSERT_EQ(longField->GetLong(h_instance.Get()), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatField = sirt_klass->FindDeclaredInstanceField("floatField", "F");
+  mirror::ArtField* floatField = h_klass->FindDeclaredInstanceField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
   ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
-  ASSERT_EQ(floatField->GetFloat(sirt_instance.get()), static_cast<float>(0.0f));
+  ASSERT_EQ(floatField->GetFloat(h_instance.Get()), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleField = sirt_klass->FindDeclaredInstanceField("doubleField", "D");
+  mirror::ArtField* doubleField = h_klass->FindDeclaredInstanceField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
   ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
-  ASSERT_EQ(doubleField->GetDouble(sirt_instance.get()), static_cast<double>(0.0));
+  ASSERT_EQ(doubleField->GetDouble(h_instance.Get()), static_cast<double>(0.0));
 
-  mirror::ArtField* objectField = sirt_klass->FindDeclaredInstanceField("objectField",
+  mirror::ArtField* objectField = h_klass->FindDeclaredInstanceField("objectField",
                                                                         "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
   ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-  ASSERT_EQ(objectField->GetObject(sirt_instance.get()), nullptr);
+  ASSERT_EQ(objectField->GetObject(h_instance.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
-  SirtRef<mirror::Class> object_klass(soa.Self(),
-                                      class_linker_->FindSystemClass(soa.Self(),
-                                                                     "Ljava/lang/Object;"));
-  ASSERT_TRUE(object_klass.get() != nullptr);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  Handle<mirror::Class> object_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(object_klass.Get() != nullptr);
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
 
   // Modify fields inside transaction and abort it.
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  booleanField->SetBoolean<true>(sirt_instance.get(), true);
-  byteField->SetByte<true>(sirt_instance.get(), 1);
-  charField->SetChar<true>(sirt_instance.get(), 1u);
-  shortField->SetShort<true>(sirt_instance.get(), 1);
-  intField->SetInt<true>(sirt_instance.get(), 1);
-  longField->SetLong<true>(sirt_instance.get(), 1);
-  floatField->SetFloat<true>(sirt_instance.get(), 1.0);
-  doubleField->SetDouble<true>(sirt_instance.get(), 1.0);
-  objectField->SetObject<true>(sirt_instance.get(), sirt_obj.get());
+  booleanField->SetBoolean<true>(h_instance.Get(), true);
+  byteField->SetByte<true>(h_instance.Get(), 1);
+  charField->SetChar<true>(h_instance.Get(), 1u);
+  shortField->SetShort<true>(h_instance.Get(), 1);
+  intField->SetInt<true>(h_instance.Get(), 1);
+  longField->SetLong<true>(h_instance.Get(), 1);
+  floatField->SetFloat<true>(h_instance.Get(), 1.0);
+  doubleField->SetDouble<true>(h_instance.Get(), 1.0);
+  objectField->SetObject<true>(h_instance.Get(), h_obj.Get());
   Runtime::Current()->ExitTransactionMode();
   transaction.Abort();
 
   // Check values have properly been restored to their original (default) value.
-  EXPECT_EQ(booleanField->GetBoolean(sirt_instance.get()), false);
-  EXPECT_EQ(byteField->GetByte(sirt_instance.get()), 0);
-  EXPECT_EQ(charField->GetChar(sirt_instance.get()), 0u);
-  EXPECT_EQ(shortField->GetShort(sirt_instance.get()), 0);
-  EXPECT_EQ(intField->GetInt(sirt_instance.get()), 0);
-  EXPECT_EQ(longField->GetLong(sirt_instance.get()), static_cast<int64_t>(0));
-  EXPECT_EQ(floatField->GetFloat(sirt_instance.get()), static_cast<float>(0.0f));
-  EXPECT_EQ(doubleField->GetDouble(sirt_instance.get()), static_cast<double>(0.0));
-  EXPECT_EQ(objectField->GetObject(sirt_instance.get()), nullptr);
+  EXPECT_EQ(booleanField->GetBoolean(h_instance.Get()), false);
+  EXPECT_EQ(byteField->GetByte(h_instance.Get()), 0);
+  EXPECT_EQ(charField->GetChar(h_instance.Get()), 0u);
+  EXPECT_EQ(shortField->GetShort(h_instance.Get()), 0);
+  EXPECT_EQ(intField->GetInt(h_instance.Get()), 0);
+  EXPECT_EQ(longField->GetLong(h_instance.Get()), static_cast<int64_t>(0));
+  EXPECT_EQ(floatField->GetFloat(h_instance.Get()), static_cast<float>(0.0f));
+  EXPECT_EQ(doubleField->GetDouble(h_instance.Get()), static_cast<double>(0.0));
+  EXPECT_EQ(objectField->GetObject(h_instance.Get()), nullptr);
 }
 
 
 TEST_F(TransactionTest, StaticArrayFieldsTest) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction")));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction"))));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindClass(soa.Self(), "LStaticArrayFieldsTest;",
-                                                             class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
-  ASSERT_TRUE(sirt_klass->IsInitialized());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LStaticArrayFieldsTest;", class_loader)));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->EnsureInitialized(h_klass, true, true);
+  ASSERT_TRUE(h_klass->IsInitialized());
 
   // Lookup fields.
-  mirror::ArtField* booleanArrayField = sirt_klass->FindDeclaredStaticField("booleanArrayField", "[Z");
+  mirror::ArtField* booleanArrayField = h_klass->FindDeclaredStaticField("booleanArrayField", "[Z");
   ASSERT_TRUE(booleanArrayField != nullptr);
-  mirror::BooleanArray* booleanArray = booleanArrayField->GetObject(sirt_klass.get())->AsBooleanArray();
+  mirror::BooleanArray* booleanArray = booleanArrayField->GetObject(h_klass.Get())->AsBooleanArray();
   ASSERT_TRUE(booleanArray != nullptr);
   ASSERT_EQ(booleanArray->GetLength(), 1);
   ASSERT_EQ(booleanArray->GetWithoutChecks(0), false);
 
-  mirror::ArtField* byteArrayField = sirt_klass->FindDeclaredStaticField("byteArrayField", "[B");
+  mirror::ArtField* byteArrayField = h_klass->FindDeclaredStaticField("byteArrayField", "[B");
   ASSERT_TRUE(byteArrayField != nullptr);
-  mirror::ByteArray* byteArray = byteArrayField->GetObject(sirt_klass.get())->AsByteArray();
+  mirror::ByteArray* byteArray = byteArrayField->GetObject(h_klass.Get())->AsByteArray();
   ASSERT_TRUE(byteArray != nullptr);
   ASSERT_EQ(byteArray->GetLength(), 1);
   ASSERT_EQ(byteArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* charArrayField = sirt_klass->FindDeclaredStaticField("charArrayField", "[C");
+  mirror::ArtField* charArrayField = h_klass->FindDeclaredStaticField("charArrayField", "[C");
   ASSERT_TRUE(charArrayField != nullptr);
-  mirror::CharArray* charArray = charArrayField->GetObject(sirt_klass.get())->AsCharArray();
+  mirror::CharArray* charArray = charArrayField->GetObject(h_klass.Get())->AsCharArray();
   ASSERT_TRUE(charArray != nullptr);
   ASSERT_EQ(charArray->GetLength(), 1);
   ASSERT_EQ(charArray->GetWithoutChecks(0), 0u);
 
-  mirror::ArtField* shortArrayField = sirt_klass->FindDeclaredStaticField("shortArrayField", "[S");
+  mirror::ArtField* shortArrayField = h_klass->FindDeclaredStaticField("shortArrayField", "[S");
   ASSERT_TRUE(shortArrayField != nullptr);
-  mirror::ShortArray* shortArray = shortArrayField->GetObject(sirt_klass.get())->AsShortArray();
+  mirror::ShortArray* shortArray = shortArrayField->GetObject(h_klass.Get())->AsShortArray();
   ASSERT_TRUE(shortArray != nullptr);
   ASSERT_EQ(shortArray->GetLength(), 1);
   ASSERT_EQ(shortArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* intArrayField = sirt_klass->FindDeclaredStaticField("intArrayField", "[I");
+  mirror::ArtField* intArrayField = h_klass->FindDeclaredStaticField("intArrayField", "[I");
   ASSERT_TRUE(intArrayField != nullptr);
-  mirror::IntArray* intArray = intArrayField->GetObject(sirt_klass.get())->AsIntArray();
+  mirror::IntArray* intArray = intArrayField->GetObject(h_klass.Get())->AsIntArray();
   ASSERT_TRUE(intArray != nullptr);
   ASSERT_EQ(intArray->GetLength(), 1);
   ASSERT_EQ(intArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* longArrayField = sirt_klass->FindDeclaredStaticField("longArrayField", "[J");
+  mirror::ArtField* longArrayField = h_klass->FindDeclaredStaticField("longArrayField", "[J");
   ASSERT_TRUE(longArrayField != nullptr);
-  mirror::LongArray* longArray = longArrayField->GetObject(sirt_klass.get())->AsLongArray();
+  mirror::LongArray* longArray = longArrayField->GetObject(h_klass.Get())->AsLongArray();
   ASSERT_TRUE(longArray != nullptr);
   ASSERT_EQ(longArray->GetLength(), 1);
   ASSERT_EQ(longArray->GetWithoutChecks(0), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatArrayField = sirt_klass->FindDeclaredStaticField("floatArrayField", "[F");
+  mirror::ArtField* floatArrayField = h_klass->FindDeclaredStaticField("floatArrayField", "[F");
   ASSERT_TRUE(floatArrayField != nullptr);
-  mirror::FloatArray* floatArray = floatArrayField->GetObject(sirt_klass.get())->AsFloatArray();
+  mirror::FloatArray* floatArray = floatArrayField->GetObject(h_klass.Get())->AsFloatArray();
   ASSERT_TRUE(floatArray != nullptr);
   ASSERT_EQ(floatArray->GetLength(), 1);
   ASSERT_EQ(floatArray->GetWithoutChecks(0), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleArrayField = sirt_klass->FindDeclaredStaticField("doubleArrayField", "[D");
+  mirror::ArtField* doubleArrayField = h_klass->FindDeclaredStaticField("doubleArrayField", "[D");
   ASSERT_TRUE(doubleArrayField != nullptr);
-  mirror::DoubleArray* doubleArray = doubleArrayField->GetObject(sirt_klass.get())->AsDoubleArray();
+  mirror::DoubleArray* doubleArray = doubleArrayField->GetObject(h_klass.Get())->AsDoubleArray();
   ASSERT_TRUE(doubleArray != nullptr);
   ASSERT_EQ(doubleArray->GetLength(), 1);
   ASSERT_EQ(doubleArray->GetWithoutChecks(0), static_cast<double>(0.0f));
 
-  mirror::ArtField* objectArrayField = sirt_klass->FindDeclaredStaticField("objectArrayField",
+  mirror::ArtField* objectArrayField = h_klass->FindDeclaredStaticField("objectArrayField",
                                                                            "[Ljava/lang/Object;");
   ASSERT_TRUE(objectArrayField != nullptr);
   mirror::ObjectArray<mirror::Object>* objectArray =
-      objectArrayField->GetObject(sirt_klass.get())->AsObjectArray<mirror::Object>();
+      objectArrayField->GetObject(h_klass.Get())->AsObjectArray<mirror::Object>();
   ASSERT_TRUE(objectArray != nullptr);
   ASSERT_EQ(objectArray->GetLength(), 1);
   ASSERT_EQ(objectArray->GetWithoutChecks(0), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
-  SirtRef<mirror::Class> object_klass(soa.Self(),
-                                      class_linker_->FindSystemClass(soa.Self(),
-                                                                     "Ljava/lang/Object;"));
-  ASSERT_TRUE(object_klass.get() != nullptr);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), sirt_klass->AllocObject(soa.Self()));
-  ASSERT_TRUE(sirt_obj.get() != nullptr);
-  ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
+  Handle<mirror::Class> object_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  ASSERT_TRUE(object_klass.Get() != nullptr);
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_klass->AllocObject(soa.Self())));
+  ASSERT_TRUE(h_obj.Get() != nullptr);
+  ASSERT_EQ(h_obj->GetClass(), h_klass.Get());
 
   // Modify fields inside transaction and abort it.
   Transaction transaction;
@@ -394,7 +392,7 @@
   longArray->SetWithoutChecks<true>(0, 1);
   floatArray->SetWithoutChecks<true>(0, 1.0);
   doubleArray->SetWithoutChecks<true>(0, 1.0);
-  objectArray->SetWithoutChecks<true>(0, sirt_obj.get());
+  objectArray->SetWithoutChecks<true>(0, h_obj.Get());
   Runtime::Current()->ExitTransactionMode();
   transaction.Abort();
 
@@ -412,42 +410,41 @@
 
 TEST_F(TransactionTest, EmptyClass) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction")));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction"))));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindClass(soa.Self(),
-                                                             "LTransaction$EmptyStatic;",
-                                                             class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LTransaction$EmptyStatic;", class_loader)));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
+  class_linker_->EnsureInitialized(h_klass, true, true);
   Runtime::Current()->ExitTransactionMode();
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
 }
 
 TEST_F(TransactionTest, StaticFieldClass) {
   ScopedObjectAccess soa(Thread::Current());
-  SirtRef<mirror::ClassLoader> class_loader(
-      soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction")));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(LoadDex("Transaction"))));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindClass(soa.Self(),
-                                                             "LTransaction$StaticFieldClass;",
-                                                             class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), "LTransaction$StaticFieldClass;",
+                                            class_loader)));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
+  class_linker_->EnsureInitialized(h_klass, true, true);
   Runtime::Current()->ExitTransactionMode();
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
 }
@@ -455,39 +452,40 @@
 TEST_F(TransactionTest, BlacklistedClass) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Transaction");
-  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
-                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
-  ASSERT_TRUE(class_loader.get() != nullptr);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+  ASSERT_TRUE(class_loader.Get() != nullptr);
 
   // Load and verify java.lang.ExceptionInInitializerError and java.lang.InternalError which will
   // be thrown by class initialization due to native call.
-  SirtRef<mirror::Class> sirt_klass(soa.Self(),
-                                    class_linker_->FindSystemClass(soa.Self(),
-                                                                   "Ljava/lang/ExceptionInInitializerError;"));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
-  sirt_klass.reset(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/InternalError;"));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
+  Handle<mirror::Class> h_klass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(),
+                                                  "Ljava/lang/ExceptionInInitializerError;")));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
+  h_klass.Assign(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/InternalError;"));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
 
   // Load and verify Transaction$NativeSupport used in class initialization.
-  sirt_klass.reset(class_linker_->FindClass(soa.Self(), "LTransaction$NativeSupport;",
-                                            class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
+  h_klass.Assign(class_linker_->FindClass(soa.Self(), "LTransaction$NativeSupport;",
+                                             class_loader));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
 
-  sirt_klass.reset(class_linker_->FindClass(soa.Self(), "LTransaction$BlacklistedClass;",
-                                            class_loader));
-  ASSERT_TRUE(sirt_klass.get() != nullptr);
-  class_linker_->VerifyClass(sirt_klass);
-  ASSERT_TRUE(sirt_klass->IsVerified());
+  h_klass.Assign(class_linker_->FindClass(soa.Self(), "LTransaction$BlacklistedClass;",
+                                             class_loader));
+  ASSERT_TRUE(h_klass.Get() != nullptr);
+  class_linker_->VerifyClass(h_klass);
+  ASSERT_TRUE(h_klass->IsVerified());
 
   Transaction transaction;
   Runtime::Current()->EnterTransactionMode(&transaction);
-  class_linker_->EnsureInitialized(sirt_klass, true, true);
+  class_linker_->EnsureInitialized(h_klass, true, true);
   Runtime::Current()->ExitTransactionMode();
   ASSERT_TRUE(soa.Self()->IsExceptionPending());
 }
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ee2cca4..f562252 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -22,10 +22,9 @@
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-
 #include <unistd.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
@@ -105,7 +104,7 @@
 }
 
 bool ReadFileToString(const std::string& file_name, std::string* result) {
-  UniquePtr<File> file(new File);
+  std::unique_ptr<File> file(new File);
   if (!file->Open(file_name, O_RDONLY)) {
     return false;
   }
@@ -230,7 +229,7 @@
   if (klass == NULL) {
     return "null";
   }
-  return PrettyDescriptor(ClassHelper(klass).GetDescriptor());
+  return PrettyDescriptor(klass->GetDescriptor());
 }
 
 std::string PrettyDescriptor(const std::string& descriptor) {
@@ -412,11 +411,9 @@
   if (obj->GetClass() == NULL) {
     return "(raw)";
   }
-  ClassHelper kh(obj->GetClass());
-  std::string result(PrettyDescriptor(kh.GetDescriptor()));
+  std::string result(PrettyDescriptor(obj->GetClass()->GetDescriptor()));
   if (obj->IsClass()) {
-    kh.ChangeClass(obj->AsClass());
-    result += "<" + PrettyDescriptor(kh.GetDescriptor()) + ">";
+    result += "<" + PrettyDescriptor(obj->AsClass()->GetDescriptor()) + ">";
   }
   return result;
 }
@@ -1047,7 +1044,7 @@
   if (current_method != nullptr) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  UniquePtr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
   if (!backtrace->Unwind(0)) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
     return;
@@ -1169,10 +1166,12 @@
 
 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
   CHECK(subdir != nullptr);
-  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const char* android_data = GetAndroidData();
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
+    if (strcmp(android_data, "/data") != 0) {
       int result = mkdir(dalvik_cache_root.c_str(), 0700);
       if (result != 0 && errno != EEXIST) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
@@ -1204,6 +1203,37 @@
   return StringPrintf("%s/%s", cache_location, cache_file.c_str());
 }
 
+static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
+  // in = /foo/bar/baz
+  // out = /foo/bar/<isa>/baz
+  size_t pos = filename->rfind('/');
+  CHECK_NE(pos, std::string::npos) << *filename << " " << isa;
+  filename->insert(pos, "/", 1);
+  filename->insert(pos + 1, GetInstructionSetString(isa));
+}
+
+std::string GetSystemImageFilename(const char* location, const InstructionSet isa) {
+  // location = /system/framework/boot.art
+  // filename = /system/framework/<isa>/boot.art
+  std::string filename(location);
+  InsertIsaDirectory(isa, &filename);
+  return filename;
+}
+
+std::string DexFilenameToOdexFilename(const std::string& location, const InstructionSet isa) {
+  // location = /foo/bar/baz.jar
+  // odex_location = /foo/bar/<isa>/baz.odex
+  CHECK_GE(location.size(), 4U) << location;  // must be at least .123
+  std::string odex_location(location);
+  InsertIsaDirectory(isa, &odex_location);
+  size_t dot_index = odex_location.size() - 3 - 1;  // 3=dex or zip or apk
+  CHECK_EQ('.', odex_location[dot_index]) << location;
+  odex_location.resize(dot_index + 1);
+  CHECK_EQ('.', odex_location[odex_location.size()-1]) << location << " " << odex_location;
+  odex_location += "odex";
+  return odex_location;
+}
+
 bool IsZipMagic(uint32_t magic) {
   return (('P' == ((magic >> 0) & 0xff)) &&
           ('K' == ((magic >> 8) & 0xff)));
diff --git a/runtime/utils.h b/runtime/utils.h
index 14a532e..4a9236a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -25,6 +25,7 @@
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "globals.h"
+#include "instruction_set.h"
 #include "primitive.h"
 
 namespace art {
@@ -402,6 +403,13 @@
 std::string GetDalvikCacheFilenameOrDie(const char* file_location,
                                         const char* cache_location);
 
+// Returns the system location for an image
+std::string GetSystemImageFilename(const char* location, InstructionSet isa);
+
+// Returns an .odex file name next adjacent to the dex location.
+// For example, for "/foo/bar/baz.jar", return "/foo/bar/<isa>/baz.odex".
+std::string DexFilenameToOdexFilename(const std::string& location, InstructionSet isa);
+
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
 bool IsDexMagic(uint32_t magic);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index d425620..4a1e477 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -23,7 +23,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref.h"
+#include "handle_scope-inl.h"
 
 #include <valgrind.h>
 
@@ -95,11 +95,12 @@
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("null", PrettyTypeOf(NULL));
 
-  SirtRef<mirror::String> s(soa.Self(), mirror::String::AllocFromModifiedUtf8(soa.Self(), ""));
-  EXPECT_EQ("java.lang.String", PrettyTypeOf(s.get()));
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::String> s(hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "")));
+  EXPECT_EQ("java.lang.String", PrettyTypeOf(s.Get()));
 
-  SirtRef<mirror::ShortArray> a(soa.Self(), mirror::ShortArray::Alloc(soa.Self(), 2));
-  EXPECT_EQ("short[]", PrettyTypeOf(a.get()));
+  Handle<mirror::ShortArray> a(hs.NewHandle(mirror::ShortArray::Alloc(soa.Self(), 2)));
+  EXPECT_EQ("short[]", PrettyTypeOf(a.Get()));
 
   mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;");
   ASSERT_TRUE(c != NULL);
@@ -350,6 +351,16 @@
                GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
 }
 
+TEST_F(UtilsTest, GetSystemImageFilename) {
+  EXPECT_STREQ("/system/framework/arm/boot.art",
+               GetSystemImageFilename("/system/framework/boot.art", kArm).c_str());
+}
+
+TEST_F(UtilsTest, DexFilenameToOdexFilename) {
+  EXPECT_STREQ("/foo/bar/arm/baz.odex",
+               DexFilenameToOdexFilename("/foo/bar/baz.jar", kArm).c_str());
+}
+
 TEST_F(UtilsTest, ExecSuccess) {
   std::vector<std::string> command;
   if (kIsTargetBuild) {
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index c554394..62ecf4b 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -21,7 +21,7 @@
 #include "method_verifier.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 
 namespace art {
 namespace verifier {
@@ -39,11 +39,11 @@
 }
 
 inline mirror::ClassLoader* MethodVerifier::GetClassLoader() {
-  return class_loader_->get();
+  return class_loader_->Get();
 }
 
 inline mirror::DexCache* MethodVerifier::GetDexCache() {
-  return dex_cache_->get();
+  return dex_cache_->Get();
 }
 
 inline MethodReference MethodVerifier::GetMethodReference() const {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 91170f0..b5c07aa 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -40,7 +40,7 @@
 #include "register_line-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
-#include "sirt_ref-inl.h"
+#include "handle_scope-inl.h"
 #include "verifier/dex_gc_map.h"
 
 namespace art {
@@ -93,11 +93,10 @@
   }
   bool early_failure = false;
   std::string failure_message;
-  ClassHelper kh(klass);
-  const DexFile& dex_file = kh.GetDexFile();
-  const DexFile::ClassDef* class_def = kh.GetClassDef();
+  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile::ClassDef* class_def = klass->GetClassDef();
   mirror::Class* super = klass->GetSuperClass();
-  if (super == NULL && strcmp("Ljava/lang/Object;", kh.GetDescriptor()) != 0) {
+  if (super == NULL && "Ljava/lang/Object;" != klass->GetDescriptor()) {
     early_failure = true;
     failure_message = " that has no super class";
   } else if (super != NULL && super->IsFinal()) {
@@ -115,15 +114,15 @@
     }
     return kHardFailure;
   }
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
   return VerifyClass(&dex_file, dex_cache, class_loader, class_def, allow_soft_failures, error);
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(const DexFile* dex_file,
-                                                        SirtRef<mirror::DexCache>& dex_cache,
-                                                        SirtRef<mirror::ClassLoader>& class_loader,
+                                                        Handle<mirror::DexCache> dex_cache,
+                                                        Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
@@ -152,7 +151,8 @@
     previous_direct_method_idx = method_idx;
     InvokeType type = it.GetMethodInvokeType(*class_def);
     mirror::ArtMethod* method =
-        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader, NULL, type);
+        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader,
+                              NullHandle<mirror::ArtMethod>(), type);
     if (method == NULL) {
       DCHECK(Thread::Current()->IsExceptionPending());
       // We couldn't resolve the method, but continue regardless.
@@ -166,7 +166,8 @@
                                                       it.GetMethodCodeItem(),
                                                       method,
                                                       it.GetMemberAccessFlags(),
-                                                      allow_soft_failures);
+                                                      allow_soft_failures,
+                                                      false);
     if (result != kNoFailure) {
       if (result == kHardFailure) {
         hard_fail = true;
@@ -194,7 +195,8 @@
     previous_virtual_method_idx = method_idx;
     InvokeType type = it.GetMethodInvokeType(*class_def);
     mirror::ArtMethod* method =
-        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader, NULL, type);
+        linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader,
+                              NullHandle<mirror::ArtMethod>(), type);
     if (method == NULL) {
       DCHECK(Thread::Current()->IsExceptionPending());
       // We couldn't resolve the method, but continue regardless.
@@ -208,7 +210,8 @@
                                                       it.GetMethodCodeItem(),
                                                       method,
                                                       it.GetMemberAccessFlags(),
-                                                      allow_soft_failures);
+                                                      allow_soft_failures,
+                                                      false);
     if (result != kNoFailure) {
       if (result == kHardFailure) {
         hard_fail = true;
@@ -233,38 +236,40 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyMethod(uint32_t method_idx,
                                                          const DexFile* dex_file,
-                                                         SirtRef<mirror::DexCache>& dex_cache,
-                                                         SirtRef<mirror::ClassLoader>& class_loader,
+                                                         Handle<mirror::DexCache> dex_cache,
+                                                         Handle<mirror::ClassLoader> class_loader,
                                                          const DexFile::ClassDef* class_def,
                                                          const DexFile::CodeItem* code_item,
                                                          mirror::ArtMethod* method,
                                                          uint32_t method_access_flags,
-                                                         bool allow_soft_failures) {
+                                                         bool allow_soft_failures,
+                                                         bool need_precise_constants) {
   MethodVerifier::FailureKind result = kNoFailure;
   uint64_t start_ns = NanoTime();
 
-  MethodVerifier verifier_(dex_file, &dex_cache, &class_loader, class_def, code_item,
-                           method_idx, method, method_access_flags, true, allow_soft_failures);
-  if (verifier_.Verify()) {
+  MethodVerifier verifier(dex_file, &dex_cache, &class_loader, class_def, code_item,
+                           method_idx, method, method_access_flags, true, allow_soft_failures,
+                           need_precise_constants);
+  if (verifier.Verify()) {
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
-    CHECK(!verifier_.have_pending_hard_failure_);
-    if (verifier_.failures_.size() != 0) {
+    CHECK(!verifier.have_pending_hard_failure_);
+    if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
-          verifier_.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
+          verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
                                 << PrettyMethod(method_idx, *dex_file) << "\n");
       }
       result = kSoftFailure;
     }
   } else {
     // Bad method data.
-    CHECK_NE(verifier_.failures_.size(), 0U);
-    CHECK(verifier_.have_pending_hard_failure_);
-    verifier_.DumpFailures(LOG(INFO) << "Verification error in "
+    CHECK_NE(verifier.failures_.size(), 0U);
+    CHECK(verifier.have_pending_hard_failure_);
+    verifier.DumpFailures(LOG(INFO) << "Verification error in "
                                     << PrettyMethod(method_idx, *dex_file) << "\n");
     if (gDebugVerify) {
-      std::cout << "\n" << verifier_.info_messages_.str();
-      verifier_.Dump(std::cout);
+      std::cout << "\n" << verifier.info_messages_.str();
+      verifier.Dump(std::cout);
     }
     result = kHardFailure;
   }
@@ -278,26 +283,27 @@
 
 void MethodVerifier::VerifyMethodAndDump(std::ostream& os, uint32_t dex_method_idx,
                                          const DexFile* dex_file,
-                                         SirtRef<mirror::DexCache>& dex_cache,
-                                         SirtRef<mirror::ClassLoader>& class_loader,
+                                         Handle<mirror::DexCache> dex_cache,
+                                         Handle<mirror::ClassLoader> class_loader,
                                          const DexFile::ClassDef* class_def,
                                          const DexFile::CodeItem* code_item,
                                          mirror::ArtMethod* method,
                                          uint32_t method_access_flags) {
   MethodVerifier verifier(dex_file, &dex_cache, &class_loader, class_def, code_item,
-                          dex_method_idx, method, method_access_flags, true, true);
+                          dex_method_idx, method, method_access_flags, true, true, true);
   verifier.Verify();
   verifier.DumpFailures(os);
   os << verifier.info_messages_.str();
   verifier.Dump(os);
 }
 
-MethodVerifier::MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
-                               SirtRef<mirror::ClassLoader>* class_loader,
+MethodVerifier::MethodVerifier(const DexFile* dex_file, Handle<mirror::DexCache>* dex_cache,
+                               Handle<mirror::ClassLoader>* class_loader,
                                const DexFile::ClassDef* class_def,
                                const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
                                mirror::ArtMethod* method, uint32_t method_access_flags,
-                               bool can_load_classes, bool allow_soft_failures)
+                               bool can_load_classes, bool allow_soft_failures,
+                               bool need_precise_constants)
     : reg_types_(can_load_classes),
       work_insn_idx_(-1),
       dex_method_idx_(dex_method_idx),
@@ -318,6 +324,7 @@
       monitor_enter_count_(0),
       can_load_classes_(can_load_classes),
       allow_soft_failures_(allow_soft_failures),
+      need_precise_constants_(need_precise_constants),
       has_check_casts_(false),
       has_virtual_or_interface_invokes_(false) {
   Runtime::Current()->AddMethodVerifier(this);
@@ -330,16 +337,16 @@
 }
 
 void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
-                                      std::vector<uint32_t>& monitor_enter_dex_pcs) {
+                                      std::vector<uint32_t>* monitor_enter_dex_pcs) {
   MethodHelper mh(m);
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
   MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
                           mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
-                          true);
+                          true, false);
   verifier.interesting_dex_pc_ = dex_pc;
-  verifier.monitor_enter_dex_pcs_ = &monitor_enter_dex_pcs;
+  verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs;
   verifier.FindLocksAtDexPc();
 }
 
@@ -355,14 +362,14 @@
 }
 
 mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
-                                                        uint32_t dex_pc) {
+                                                           uint32_t dex_pc) {
   MethodHelper mh(m);
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
   MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
                           mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true);
+                          true, false);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
@@ -388,12 +395,12 @@
 mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m,
                                                             uint32_t dex_pc) {
   MethodHelper mh(m);
-  Thread* self = Thread::Current();
-  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
-  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  StackHandleScope<2> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(mh.GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(mh.GetClassLoader()));
   MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
                           mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true);
+                          true, false);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
 
@@ -649,54 +656,53 @@
 }
 
 bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
-  DecodedInstruction dec_insn(inst);
   bool result = true;
   switch (inst->GetVerifyTypeArgumentA()) {
     case Instruction::kVerifyRegA:
-      result = result && CheckRegisterIndex(dec_insn.vA);
+      result = result && CheckRegisterIndex(inst->VRegA());
       break;
     case Instruction::kVerifyRegAWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vA);
+      result = result && CheckWideRegisterIndex(inst->VRegA());
       break;
   }
   switch (inst->GetVerifyTypeArgumentB()) {
     case Instruction::kVerifyRegB:
-      result = result && CheckRegisterIndex(dec_insn.vB);
+      result = result && CheckRegisterIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBField:
-      result = result && CheckFieldIndex(dec_insn.vB);
+      result = result && CheckFieldIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBMethod:
-      result = result && CheckMethodIndex(dec_insn.vB);
+      result = result && CheckMethodIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBNewInstance:
-      result = result && CheckNewInstance(dec_insn.vB);
+      result = result && CheckNewInstance(inst->VRegB());
       break;
     case Instruction::kVerifyRegBString:
-      result = result && CheckStringIndex(dec_insn.vB);
+      result = result && CheckStringIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBType:
-      result = result && CheckTypeIndex(dec_insn.vB);
+      result = result && CheckTypeIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vB);
+      result = result && CheckWideRegisterIndex(inst->VRegB());
       break;
   }
   switch (inst->GetVerifyTypeArgumentC()) {
     case Instruction::kVerifyRegC:
-      result = result && CheckRegisterIndex(dec_insn.vC);
+      result = result && CheckRegisterIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCField:
-      result = result && CheckFieldIndex(dec_insn.vC);
+      result = result && CheckFieldIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCNewArray:
-      result = result && CheckNewArray(dec_insn.vC);
+      result = result && CheckNewArray(inst->VRegC());
       break;
     case Instruction::kVerifyRegCType:
-      result = result && CheckTypeIndex(dec_insn.vC);
+      result = result && CheckTypeIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vC);
+      result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
   }
   switch (inst->GetVerifyExtraFlags()) {
@@ -709,11 +715,14 @@
     case Instruction::kVerifySwitchTargets:
       result = result && CheckSwitchTargets(code_offset);
       break;
-    case Instruction::kVerifyVarArg:
-      result = result && CheckVarArgRegs(dec_insn.vA, dec_insn.arg);
+    case Instruction::kVerifyVarArg: {
+      uint32_t args[Instruction::kMaxVarArgRegs];
+      inst->GetVarArgs(args);
+      result = result && CheckVarArgRegs(inst->VRegA(), args);
       break;
+    }
     case Instruction::kVerifyVarArgRange:
-      result = result && CheckVarArgRangeRegs(dec_insn.vA, dec_insn.vC);
+      result = result && CheckVarArgRangeRegs(inst->VRegA(), inst->VRegC());
       break;
     case Instruction::kVerifyError:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected opcode " << inst->Name();
@@ -1009,7 +1018,7 @@
 }
 
 bool MethodVerifier::CheckVarArgRegs(uint32_t vA, uint32_t arg[]) {
-  if (vA > 5) {
+  if (vA > Instruction::kMaxVarArgRegs) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << vA << ") in non-range invoke)";
     return false;
   }
@@ -1432,11 +1441,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  UniquePtr<RegisterLine> branch_line;
-  UniquePtr<RegisterLine> fallthrough_line;
-
-  // We need precise constant types only for deoptimization which happens at runtime.
-  const bool need_precise_constant = !Runtime::Current()->IsCompiler();
+  std::unique_ptr<RegisterLine> branch_line;
+  std::unique_ptr<RegisterLine> fallthrough_line;
 
   switch (inst->Opcode()) {
     case Instruction::NOP:
@@ -1589,25 +1595,25 @@
     case Instruction::CONST_4: {
       int32_t val = static_cast<int32_t>(inst->VRegB_11n() << 28) >> 28;
       work_line_->SetRegisterType(inst->VRegA_11n(),
-                                  DetermineCat1Constant(val, need_precise_constant));
+                                  DetermineCat1Constant(val, need_precise_constants_));
       break;
     }
     case Instruction::CONST_16: {
       int16_t val = static_cast<int16_t>(inst->VRegB_21s());
       work_line_->SetRegisterType(inst->VRegA_21s(),
-                                  DetermineCat1Constant(val, need_precise_constant));
+                                  DetermineCat1Constant(val, need_precise_constants_));
       break;
     }
     case Instruction::CONST: {
       int32_t val = inst->VRegB_31i();
       work_line_->SetRegisterType(inst->VRegA_31i(),
-                                  DetermineCat1Constant(val, need_precise_constant));
+                                  DetermineCat1Constant(val, need_precise_constants_));
       break;
     }
     case Instruction::CONST_HIGH16: {
       int32_t val = static_cast<int32_t>(inst->VRegB_21h() << 16);
       work_line_->SetRegisterType(inst->VRegA_21h(),
-                                  DetermineCat1Constant(val, need_precise_constant));
+                                  DetermineCat1Constant(val, need_precise_constants_));
       break;
     }
       /* could be long or double; resolved upon use */
@@ -1832,7 +1838,7 @@
                                             << array_type;
         } else {
           const RegType& component_type = reg_types_.GetComponentType(array_type,
-                                                                      class_loader_->get());
+                                                                      class_loader_->Get());
           DCHECK(!component_type.IsConflict());
           if (component_type.IsNonZeroReferenceTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with component type "
@@ -2147,7 +2153,7 @@
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
-        return_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+        return_type = &reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
       }
       if (!return_type->IsLowHalf()) {
         work_line_->SetResultRegisterType(*return_type);
@@ -2214,7 +2220,7 @@
          */
         work_line_->MarkRefsAsInitialized(this_type);
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(),
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(),
                                                              return_type_descriptor, false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
@@ -2240,7 +2246,7 @@
         } else {
           descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
         }
-        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
                                                                 false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
@@ -2298,7 +2304,7 @@
       } else {
         descriptor = MethodHelper(abs_method).GetReturnTypeDescriptor();
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
                                                              false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
@@ -2564,7 +2570,7 @@
       mirror::ArtMethod* called_method = VerifyInvokeVirtualQuickArgs(inst, is_range);
       if (called_method != NULL) {
         const char* descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
-        const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+        const RegType& return_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor,
                                                                false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
@@ -2835,7 +2841,7 @@
   const RegType& result =
       klass != NULL ? reg_types_.FromClass(descriptor, klass,
                                            klass->CannotBeAssignedFromOtherTypes())
-                    : reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+                    : reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
   if (result.IsConflict()) {
     Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "accessing broken descriptor '" << descriptor
         << "' in " << referrer;
@@ -3055,7 +3061,7 @@
     if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) {
       mirror::Class* klass = res_method->GetDeclaringClass();
       const RegType& res_method_class =
-          reg_types_.FromClass(ClassHelper(klass).GetDescriptor(), klass,
+          reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
                                klass->CannotBeAssignedFromOtherTypes());
       if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
         Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
@@ -3075,7 +3081,7 @@
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   for (size_t param_index = 0; param_index < params_size; param_index++) {
     if (actual_args >= expected_args) {
@@ -3091,7 +3097,7 @@
           << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(get_reg);
@@ -3123,10 +3129,19 @@
     VLOG(verifier) << "Failed to get mirror::Class* from '" << actual_arg_type << "'";
     return nullptr;
   }
-  mirror::ObjectArray<mirror::ArtMethod>* vtable = actual_arg_type.GetClass()->GetVTable();
-  CHECK(vtable != nullptr);
+  mirror::ObjectArray<mirror::ArtMethod>* vtable = nullptr;
+  mirror::Class* klass = actual_arg_type.GetClass();
+  if (klass->IsInterface()) {
+    // Derive Object.class from Class.class.getSuperclass().
+    mirror::Class* object_klass = klass->GetClass()->GetSuperClass();
+    CHECK(object_klass->IsObjectClass());
+    vtable = object_klass->GetVTable();
+  } else {
+    vtable = klass->GetVTable();
+  }
+  CHECK(vtable != nullptr) << PrettyDescriptor(klass);
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength());
+  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength()) << PrettyDescriptor(klass);
   mirror::ArtMethod* res_method = vtable->Get(vtable_index);
   CHECK(!Thread::Current()->IsExceptionPending());
   return res_method;
@@ -3171,7 +3186,7 @@
   if (!actual_arg_type.IsZero()) {
     mirror::Class* klass = res_method->GetDeclaringClass();
     const RegType& res_method_class =
-        reg_types_.FromClass(ClassHelper(klass).GetDescriptor(), klass,
+        reg_types_.FromClass(klass->GetDescriptor().c_str(), klass,
                              klass->CannotBeAssignedFromOtherTypes());
     if (!res_method_class.IsAssignableFrom(actual_arg_type)) {
       Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS :
@@ -3189,7 +3204,7 @@
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   size_t actual_args = 1;
   for (size_t param_index = 0; param_index < params_size; param_index++) {
@@ -3207,7 +3222,7 @@
                                         << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
       return res_method;
@@ -3251,11 +3266,11 @@
     } else {
       // Verify each register. If "arg_count" is bad, VerifyRegisterType() will run off the end of
       // the list and fail. It's legal, if silly, for arg_count to be zero.
-      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_->get());
+      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_->Get());
       uint32_t arg_count = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
       uint32_t arg[5];
       if (!is_range) {
-        inst->GetArgs(arg);
+        inst->GetVarArgs(arg);
       }
       for (size_t ui = 0; ui < arg_count; ui++) {
         uint32_t get_reg = is_range ? inst->VRegC_3rc() + ui : arg[ui];
@@ -3293,7 +3308,7 @@
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aget";
     } else {
       /* verify the class */
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->Get());
       if (!component_type.IsReferenceTypes() && !is_primitive) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "primitive array type " << array_type
             << " source for aget-object";
@@ -3370,7 +3385,7 @@
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->Get());
       const uint32_t vregA = inst->VRegA_23x();
       if (is_primitive) {
         VerifyPrimitivePut(component_type, insn_type, vregA);
@@ -3512,7 +3527,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
   }
   DCHECK(field_type != nullptr);
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
@@ -3536,7 +3551,7 @@
       Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
                                         << " to be compatible with type '" << insn_type
                                         << "' but found type '" << *field_type
-                                        << "' in get-object";
+                                        << "' in Get-object";
       work_line_->SetRegisterType(vregA, reg_types_.Conflict());
       return;
     }
@@ -3579,7 +3594,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
   }
   DCHECK(field_type != nullptr);
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
@@ -3655,7 +3670,7 @@
       // compile time
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << PrettyField(field)
                                         << " to be of type '" << insn_type
-                                        << "' but found type '" << *field_type << "' in get";
+                                        << "' but found type '" << *field_type << "' in Get";
       return;
     }
   } else {
@@ -3784,7 +3799,7 @@
       }
     }
   } else {
-    UniquePtr<RegisterLine> copy(gDebugVerify ?
+    std::unique_ptr<RegisterLine> copy(gDebugVerify ?
                                  RegisterLine::Create(target_line->NumRegs(), this) :
                                  NULL);
     if (gDebugVerify) {
@@ -3831,7 +3846,7 @@
       const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
       uint16_t return_type_idx = proto_id.return_type_idx_;
       const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
-      return_type_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+      return_type_ = &reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
     }
   }
   return *return_type_;
@@ -3847,7 +3862,7 @@
       declaring_class_ = &reg_types_.FromClass(descriptor, klass,
                                                klass->CannotBeAssignedFromOtherTypes());
     } else {
-      declaring_class_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
+      declaring_class_ = &reg_types_.FromDescriptor(class_loader_->Get(), descriptor, false);
     }
   }
   return *declaring_class_;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 5f13191..a23e80d 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 #define ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 
+#include <memory>
 #include <set>
 #include <vector>
 
@@ -33,12 +34,11 @@
 #include "reg_type_cache-inl.h"
 #include "register_line.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
 struct ReferenceMap2Visitor;
-template<class T> class SirtRef;
+template<class T> class Handle;
 
 namespace verifier {
 
@@ -126,7 +126,7 @@
   }
 
  private:
-  UniquePtr<RegisterLine*[]> register_lines_;
+  std::unique_ptr<RegisterLine*[]> register_lines_;
   size_t size_;
 };
 
@@ -142,15 +142,15 @@
   /* Verify a class. Returns "kNoFailure" on success. */
   static FailureKind VerifyClass(mirror::Class* klass, bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static FailureKind VerifyClass(const DexFile* dex_file, SirtRef<mirror::DexCache>& dex_cache,
-                                 SirtRef<mirror::ClassLoader>& class_loader,
+  static FailureKind VerifyClass(const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
+                                 Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
                                  bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void VerifyMethodAndDump(std::ostream& os, uint32_t method_idx, const DexFile* dex_file,
-                                  SirtRef<mirror::DexCache>& dex_cache,
-                                  SirtRef<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags)
@@ -185,7 +185,7 @@
   // Fills 'monitor_enter_dex_pcs' with the dex pcs of the monitor-enter instructions corresponding
   // to the locks held at 'dex_pc' in method 'm'.
   static void FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
-                               std::vector<uint32_t>& monitor_enter_dex_pcs)
+                               std::vector<uint32_t>* monitor_enter_dex_pcs)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the accessed field corresponding to the quick instruction's field
@@ -205,10 +205,11 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
-                 SirtRef<mirror::ClassLoader>* class_loader, const DexFile::ClassDef* class_def,
+  MethodVerifier(const DexFile* dex_file, Handle<mirror::DexCache>* dex_cache,
+                 Handle<mirror::ClassLoader>* class_loader, const DexFile::ClassDef* class_def,
                  const DexFile::CodeItem* code_item, uint32_t method_idx, mirror::ArtMethod* method,
-                 uint32_t access_flags, bool can_load_classes, bool allow_soft_failures)
+                 uint32_t access_flags, bool can_load_classes, bool allow_soft_failures,
+                 bool need_precise_constants)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ~MethodVerifier();
@@ -255,12 +256,12 @@
    *      for code flow problems.
    */
   static FailureKind VerifyMethod(uint32_t method_idx, const DexFile* dex_file,
-                                  SirtRef<mirror::DexCache>& dex_cache,
-                                  SirtRef<mirror::ClassLoader>& class_loader,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags,
-                                  bool allow_soft_failures)
+                                  bool allow_soft_failures, bool need_precise_constants)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FindLocksAtDexPc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -347,7 +348,7 @@
   /* Ensure that the wide register index is valid for this code item. */
   bool CheckWideRegisterIndex(uint32_t idx);
 
-  // Perform static checks on a field get or set instruction. All we do here is ensure that the
+  // Perform static checks on a field Get or set instruction. All we do here is ensure that the
   // field index is in the valid range.
   bool CheckFieldIndex(uint32_t idx);
 
@@ -617,14 +618,14 @@
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  UniquePtr<RegisterLine> work_line_;
+  std::unique_ptr<RegisterLine> work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  UniquePtr<RegisterLine> saved_line_;
+  std::unique_ptr<RegisterLine> saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
@@ -633,14 +634,14 @@
   const RegType* return_type_;  // Lazily computed return type of the method.
   const DexFile* const dex_file_;  // The dex file containing the method.
   // The dex_cache for the declaring class of the method.
-  SirtRef<mirror::DexCache>* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
+  Handle<mirror::DexCache>* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
   // The class loader for the declaring class of the method.
-  SirtRef<mirror::ClassLoader>* class_loader_ GUARDED_BY(Locks::mutator_lock_);
+  Handle<mirror::ClassLoader>* class_loader_ GUARDED_BY(Locks::mutator_lock_);
   const DexFile::ClassDef* const class_def_;  // The class def of the declaring class of the method.
   const DexFile::CodeItem* const code_item_;  // The code item containing the code for the method.
   const RegType* declaring_class_;  // Lazily computed reg type of the method's declaring class.
   // Instruction widths and flags, one entry per code unit.
-  UniquePtr<InstructionFlags[]> insn_flags_;
+  std::unique_ptr<InstructionFlags[]> insn_flags_;
   // The dex PC of a FindLocksAtDexPc request, -1 otherwise.
   uint32_t interesting_dex_pc_;
   // The container into which FindLocksAtDexPc should write the registers containing held locks,
@@ -672,6 +673,12 @@
   // running and the verifier is called from the class linker.
   const bool allow_soft_failures_;
 
+  // An optimization where instead of generating unique RegTypes for constants we use imprecise
+  // constants that cover a range of constants. This isn't good enough for deoptimization that
+  // avoids loading from registers in the case of a constant as the dex instruction set lost the
+  // notion of whether a value should be in a floating point or general purpose register file.
+  const bool need_precise_constants_;
+
   // Indicates the method being verified contains at least one check-cast or aput-object
   // instruction. Aput-object operations implicitly check for array-store exceptions, similar to
   // check-cast.
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 9dca7f5..9ac04d7 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -17,8 +17,8 @@
 #include "method_verifier.h"
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index c6f3e5c..8df1e5d 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -621,7 +621,7 @@
     if (super_klass != NULL) {
       // A super class of a precise type isn't precise as a precise type indicates the register
       // holds exactly that type.
-      return cache->FromClass(ClassHelper(super_klass).GetDescriptor(), super_klass, false);
+      return cache->FromClass(super_klass->GetDescriptor().c_str(), super_klass, false);
     } else {
       return cache->Zero();
     }
@@ -899,7 +899,7 @@
       } else if (c2 == join_class && !incoming_type.IsPreciseReference()) {
         return incoming_type;
       } else {
-        return reg_types->FromClass(ClassHelper(join_class).GetDescriptor(), join_class, false);
+        return reg_types->FromClass(join_class->GetDescriptor().c_str(), join_class, false);
       }
     }
   } else {
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 111e867..ff9edbb 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -142,15 +142,16 @@
   // Try resolving class
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* self = Thread::Current();
-  SirtRef<mirror::ClassLoader> class_loader(self, loader);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(loader));
   mirror::Class* klass = NULL;
   if (can_load_classes_) {
     klass = class_linker->FindClass(self, descriptor, class_loader);
   } else {
     klass = class_linker->LookupClass(descriptor, loader);
-    if (klass != NULL && !klass->IsLoaded()) {
+    if (klass != nullptr && !klass->IsLoaded()) {
       // We found the class but without it being loaded its not safe for use.
-      klass = NULL;
+      klass = nullptr;
     }
   }
   return klass;
@@ -566,7 +567,7 @@
     return FromDescriptor(loader, component.c_str(), false);
   } else {
     mirror::Class* klass = array.GetClass()->GetComponentType();
-    return FromClass(ClassHelper(klass).GetDescriptor(), klass,
+    return FromClass(klass->GetDescriptor().c_str(), klass,
                      klass->CannotBeAssignedFromOtherTypes());
   }
 }
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 31b0113..a3e3e3b 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -94,7 +94,7 @@
     verifier_->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke lacks 'this'";
     return verifier_->GetRegTypeCache()->Conflict();
   }
-  /* get the element type of the array held in vsrc */
+  /* Get the element type of the array held in vsrc */
   const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   const RegType& this_type = GetRegisterType(this_reg);
   if (!this_type.IsReferenceTypes()) {
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 8b2dadb..dade203 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -17,12 +17,12 @@
 #ifndef ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 #define ART_RUNTIME_VERIFIER_REGISTER_LINE_H_
 
+#include <memory>
 #include <vector>
 
 #include "dex_instruction.h"
 #include "reg_type.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 546eb40..fdc6e3f 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -40,6 +40,7 @@
 jclass WellKnownClasses::java_lang_reflect_Proxy;
 jclass WellKnownClasses::java_lang_RuntimeException;
 jclass WellKnownClasses::java_lang_StackOverflowError;
+jclass WellKnownClasses::java_lang_String;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
 jclass WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler;
@@ -142,6 +143,7 @@
   java_lang_reflect_Proxy = CacheClass(env, "java/lang/reflect/Proxy");
   java_lang_RuntimeException = CacheClass(env, "java/lang/RuntimeException");
   java_lang_StackOverflowError = CacheClass(env, "java/lang/StackOverflowError");
+  java_lang_String = CacheClass(env, "java/lang/String");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
   java_lang_Thread$UncaughtExceptionHandler = CacheClass(env, "java/lang/Thread$UncaughtExceptionHandler");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index bc928d0..f6c2930 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -54,6 +54,7 @@
   static jclass java_lang_reflect_Proxy;
   static jclass java_lang_RuntimeException;
   static jclass java_lang_StackOverflowError;
+  static jclass java_lang_String;
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index ddac7d4..841c01a 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -16,17 +16,15 @@
 
 #include "zip_archive.h"
 
-#include <vector>
-
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <vector>
 
 #include "base/stringprintf.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtr.h"
 
 namespace art {
 
@@ -56,7 +54,7 @@
   std::string name(entry_filename);
   name += " extracted in memory from ";
   name += entry_filename;
-  UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
+  std::unique_ptr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
                                              NULL, GetUncompressedLength(),
                                              PROT_READ | PROT_WRITE, false, error_msg));
   if (map.get() == nullptr) {
@@ -123,7 +121,7 @@
   DCHECK(name != nullptr);
 
   // Resist the urge to delete the space. <: is a bigraph sequence.
-  UniquePtr< ::ZipEntry> zip_entry(new ::ZipEntry);
+  std::unique_ptr< ::ZipEntry> zip_entry(new ::ZipEntry);
   const int32_t error = FindEntry(handle_, name, zip_entry.get());
   if (error) {
     *error_msg = std::string(ErrorCodeString(error));
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 3ef0e6b..c0e2f2f 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -18,8 +18,9 @@
 #define ART_RUNTIME_ZIP_ARCHIVE_H_
 
 #include <stdint.h>
-#include <string>
 #include <ziparchive/zip_archive.h>
+#include <memory>
+#include <string>
 
 #include "base/logging.h"
 #include "base/unix_file/random_access_file.h"
@@ -27,7 +28,6 @@
 #include "mem_map.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
 
 namespace art {
 
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index c43fee5..d303d1e 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -20,8 +20,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <zlib.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "common_runtime_test.h"
 #include "os.h"
 
@@ -31,16 +31,16 @@
 
 TEST_F(ZipArchiveTest, FindAndExtract) {
   std::string error_msg;
-  UniquePtr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(GetLibCoreDexFileName().c_str(), &error_msg));
   ASSERT_TRUE(zip_archive.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  UniquePtr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("classes.dex", &error_msg));
   ASSERT_TRUE(zip_entry.get() != nullptr);
   ASSERT_TRUE(error_msg.empty());
 
   ScratchFile tmp;
   ASSERT_NE(-1, tmp.GetFd());
-  UniquePtr<File> file(new File(tmp.GetFd(), tmp.GetFilename()));
+  std::unique_ptr<File> file(new File(tmp.GetFd(), tmp.GetFilename()));
   ASSERT_TRUE(file.get() != NULL);
   bool success = zip_entry->ExtractToFile(*file, &error_msg);
   ASSERT_TRUE(success) << error_msg;
diff --git a/test/079-phantom/src/Bitmap.java b/test/079-phantom/src/Bitmap.java
index 9d03cbd..85eb3cc 100644
--- a/test/079-phantom/src/Bitmap.java
+++ b/test/079-phantom/src/Bitmap.java
@@ -29,6 +29,7 @@
             new ReferenceQueue<PhantomWrapper>();
     private static BitmapWatcher sWatcher = new BitmapWatcher(sPhantomQueue);
     static {
+        sWatcher.setDaemon(true);
         sWatcher.start();
     };
 
diff --git a/build/Android.libcxx.mk b/test/111-unresolvable-exception/build
similarity index 64%
rename from build/Android.libcxx.mk
rename to test/111-unresolvable-exception/build
index 3dd1eb7..c21a9ef 100644
--- a/build/Android.libcxx.mk
+++ b/test/111-unresolvable-exception/build
@@ -1,3 +1,4 @@
+#!/bin/bash
 #
 # Copyright (C) 2014 The Android Open Source Project
 #
@@ -5,16 +6,20 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-  include external/stlport/libstlport.mk
-  # include external/libcxx/libcxx.mk
-endif
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+rm classes/TestException.class
+
+${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
+zip $TEST_NAME.jar classes.dex
diff --git a/test/111-unresolvable-exception/expected.txt b/test/111-unresolvable-exception/expected.txt
new file mode 100644
index 0000000..052dd74
--- /dev/null
+++ b/test/111-unresolvable-exception/expected.txt
@@ -0,0 +1 @@
+Caught class java.lang.NoClassDefFoundError
diff --git a/test/111-unresolvable-exception/info.txt b/test/111-unresolvable-exception/info.txt
new file mode 100644
index 0000000..5ba3733
--- /dev/null
+++ b/test/111-unresolvable-exception/info.txt
@@ -0,0 +1,2 @@
+Test that we do not segfault when we check a catch handler
+for an unresolvable exception.
diff --git a/test/111-unresolvable-exception/src/Main.java b/test/111-unresolvable-exception/src/Main.java
new file mode 100644
index 0000000..ba07ee1
--- /dev/null
+++ b/test/111-unresolvable-exception/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    static public void main(String[] args) throws Exception {
+      try {
+        check(false);
+      } catch (Throwable t) {          // Should catch the NoClassDefFoundError
+        System.out.println("Caught " + t.getClass());
+      }
+    }
+
+    private static void check(boolean b) {
+      try {
+        if (b) {                   // Need this to not be dead code, but also not be invoked.
+          throwsTestException();   // TestException is checked, so we need something potentially
+                                   // throwing it.
+        }
+        throw new RuntimeException();  // Trigger exception handling.
+      } catch (TestException e) {      // This handler will have an unresolvable class.
+      } catch (Exception e) {          // General-purpose handler
+        System.out.println("Should not get here!");
+      }
+    }
+
+    // This avoids having to construct one explicitly, which won't work.
+    private static native void throwsTestException() throws TestException;
+}
diff --git a/compiler/dex/bit_vector_block_iterator.cc b/test/111-unresolvable-exception/src/TestException.java
similarity index 68%
rename from compiler/dex/bit_vector_block_iterator.cc
rename to test/111-unresolvable-exception/src/TestException.java
index 32d7d71..2d8b234 100644
--- a/compiler/dex/bit_vector_block_iterator.cc
+++ b/test/111-unresolvable-exception/src/TestException.java
@@ -14,19 +14,5 @@
  * limitations under the License.
  */
 
-#include "bit_vector_block_iterator.h"
-#include "mir_graph.h"
-
-namespace art {
-
-BasicBlock* BitVectorBlockIterator::Next() {
-  int idx = internal_iterator_.Next();
-
-  if (idx == -1) {
-    return nullptr;
-  }
-
-  return mir_graph_->GetBasicBlock(idx);
+public class TestException extends Exception {
 }
-
-}  // namespace art
diff --git a/test/Android.mk b/test/Android.mk
index 08a925c..8caa033 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -73,22 +73,14 @@
     LOCAL_JAVA_LIBRARIES := $(TARGET_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_MODULE_PATH := $(3)
-    LOCAL_DEX_PREOPT_IMAGE := $(TARGET_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
     include $(BUILD_JAVA_LIBRARY)
-    
+
     ART_TEST_TARGET_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
     ART_TEST_TARGET_DEX_FILES$(ART_PHONY_TEST_TARGET_SUFFIX) += $$(LOCAL_INSTALLED_MODULE)
-
-    ifdef TARGET_2ND_ARCH
-	    ART_TEST_TARGET_DEX_FILES$(2ND_ART_PHONY_TEST_TARGET_SUFFIX) += $(4)/$(1)-$(2).jar
-
-      # TODO: make this a simple copy
-$(4)/$(1)-$(2).jar: $(3)/$(1)-$(2).jar $(4)
-	cp $$< $(4)/
-    endif    
   endif
 
   ifeq ($(ART_BUILD_HOST),true)
@@ -97,7 +89,7 @@
     LOCAL_SRC_FILES := $(call all-java-files-under, $(2))
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
-    LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
@@ -105,20 +97,12 @@
     ART_TEST_HOST_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 endef
-$(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT),$(2ND_ART_NATIVETEST_OUT))))
-$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call build-art-test-dex,oat-test-dex,$(dir),$(ART_TEST_OUT),$(2ND_ART_TEST_OUT))))
+$(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT))))
+$(foreach dir,$(TEST_OAT_DIRECTORIES), $(eval $(call build-art-test-dex,oat-test-dex,$(dir),$(ART_TEST_OUT))))
 
-# Rules to explicitly create 2nd-arch test directories, as we use a "cp" for them
-# instead of BUILD_JAVA_LIBRARY
-ifneq ($(2ND_ART_NATIVETEST_OUT),)
-$(2ND_ART_NATIVETEST_OUT):
-	$(hide) mkdir -p $@
-endif
-
-ifneq ($(2ND_ART_TEST_OUT),)
-$(2ND_ART_TEST_OUT):
-	$(hide) mkdir -p $@
-endif
+# Used outside the art project to get a list of the current tests
+ART_TEST_DEX_MAKE_TARGETS := $(addprefix art-test-dex-, $(TEST_DEX_DIRECTORIES))
+ART_TEST_OAT_MAKE_TARGETS := $(addprefix oat-test-dex-, $(TEST_OAT_DIRECTORIES))
 
 ########################################################################
 
@@ -129,12 +113,12 @@
 
 define declare-test-art-oat-targets-impl
 .PHONY: test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX)
-test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): $($(2)ART_TEST_OUT)/oat-test-dex-$(1).jar test-art-target-sync
-	adb shell touch $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)
-	adb shell rm $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)
-	adb shell sh -c "/system/bin/dalvikvm$($(2)ART_TARGET_BINARY_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$($(2)ART_TEST_DIR)/core.art -classpath $($(2)ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$($(2)ART_TEST_DIR) $(1) && touch $($(2)ART_TEST_DIR)/test-art-target-oat-$(1)"
-	$(hide) (adb pull $($(2)ART_TEST_DIR)/test-art-target-oat-$(1) /tmp/ && echo test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) PASSED) || (echo test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX) FAILED && exit 1)
-	$(hide) rm /tmp/test-art-target-oat-$(1)
+test-art-target-oat-$(1)$($(2)ART_PHONY_TEST_TARGET_SUFFIX): $(ART_TEST_OUT)/oat-test-dex-$(1).jar test-art-target-sync
+	adb shell touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@
+	adb shell rm $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@
+	adb shell sh -c "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(ART_TEST_DIR)/core.art -classpath $(ART_TEST_DIR)/oat-test-dex-$(1).jar -Djava.library.path=$(ART_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) && touch $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@"
+	$(hide) (adb pull $(ART_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@ /tmp/ && echo $$@ PASSED) || (echo $$@ FAILED && exit 1)
+	$(hide) rm /tmp/$$@
 endef
 
 # $(1): directory
@@ -150,26 +134,26 @@
   endif
   $(call declare-test-art-oat-targets-impl,$(1),)
 
-$(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
-	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_OUT) --dex-file=$(PWD)/$$< --oat-file=$(PWD)/$$@ --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
+$(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
+	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_LOCATION) --dex-file=$$(realpath $$<) --oat-file=$$@ --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
 
 .PHONY: test-art-host-oat-default-$(1)
-test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-default-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-default-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
 	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(shell pwd)/$(HOST_CORE_IMG_OUT) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm $(DALVIKVM_FLAGS) -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
           && echo test-art-host-oat-default-$(1) PASSED || (echo test-art-host-oat-default-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-default-$(1)
 
 .PHONY: test-art-host-oat-interpreter-$(1)
-test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-interpreter-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-interpreter-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
 	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
-	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$(shell pwd)/$(HOST_CORE_IMG_OUT) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
+	  $(HOST_OUT_EXECUTABLES)/dalvikvm -XXlib:libartd.so -Ximage:$(HOST_CORE_IMG_LOCATION) $(DALVIKVM_FLAGS) -Xint -classpath $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar -Djava.library.path=$(HOST_OUT_SHARED_LIBRARIES) $(1) $(2) \
           && echo test-art-host-oat-interpreter-$(1) PASSED || (echo test-art-host-oat-interpreter-$(1) FAILED && exit 1)
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-interpreter-$(1)
 
@@ -191,12 +175,13 @@
 ########################################################################
 
 TEST_ART_RUN_TEST_MAKE_TARGETS :=
+art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
 
 # Helper to create individual build targets for tests.
 # Must be called with $(eval)
 # $(1): the test number
 define declare-make-art-run-test
-dmart_target := $(TARGET_OUT_DATA)/art-run-tests/$(1)/touch
+dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
 $$(dmart_target): $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) $(LOCAL_PATH)/run-test --build-only --output-path $$(abspath $$(dir $$@)) $(1)
@@ -222,6 +207,9 @@
 LOCAL_MODULE_TAGS := tests
 LOCAL_MODULE := art-run-tests
 LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_MAKE_TARGETS)
+# The build system use this flag to pick up files generated by declare-make-art-run-test.
+LOCAL_PICKUP_FILES := $(art_run_tests_dir)
+
 include $(BUILD_PHONY_PACKAGE)
 
 # clear temp vars
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index 180db4c..48a6c61 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -15,8 +15,8 @@
  */
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index 528586e..3cf2d0b 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -15,8 +15,8 @@
  */
 
 #include <stdio.h>
+#include <memory>
 
-#include "UniquePtr.h"
 #include "class_linker.h"
 #include "gc_map.h"
 #include "mirror/art_method.h"
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index a844e82..d95559f 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -17,7 +17,7 @@
 INVOKE_WITH=""
 DEV_MODE="n"
 QUIET="n"
-COMPILER_OPTIONS=""
+FLAGS=""
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
@@ -69,7 +69,12 @@
     elif [ "x$1" = "x-Xcompiler-option" ]; then
         shift
         option="$1"
-        COMPILER_OPTIONS="${COMPILER_OPTIONS} -Xcompiler-option $option"
+        FLAGS="${FLAGS} -Xcompiler-option $option"
+        shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
         shift
     elif [ "x$1" = "x--" ]; then
         shift
@@ -127,4 +132,4 @@
 fi
 
 cd $ANDROID_BUILD_TOP
-$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $COMPILER_OPTIONS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
+$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $FLAGS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index e0d2f1d..b090c33 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -38,6 +38,11 @@
         option="$1"
         FLAGS="${FLAGS} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
+        shift
     elif [ "x$1" = "x--boot" ]; then
         shift
         BOOT_OPT="$1"
@@ -150,7 +155,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"
diff --git a/test/run-all-tests b/test/run-all-tests
index a6675df..885ee44 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -35,6 +35,7 @@
 
 run_args=""
 usage="no"
+sequental="no"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
@@ -70,6 +71,15 @@
     elif [ "x$1" = "x--help" ]; then
         usage="yes"
         shift
+    elif [ "x$1" = "x--seq" ]; then
+        sequental="yes"
+        shift
+    elif [ "x$1" = "x-O" ]; then
+        run_args="${run_args} -O"
+        shift
+    elif [ "x$1" = "x--64" ]; then
+        run_args="${run_args} --64"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -88,35 +98,56 @@
         echo "  Options are all passed to run-test; refer to that for " \
              "further documentation:"
         echo "    --debug --dev --host --interpreter --jvm --no-optimize"
-        echo "    --no-verify -O --update --valgrind --zygote"
+        echo "    --no-verify -O --update --valgrind --zygote --64"
+        echo "  Specific Runtime Options:"
+        echo "    --seq                Run tests one-by-one, avoiding failures caused by busy CPU"
     ) 1>&2
     exit 1
 fi
 
-# start all the tests
-i=0
-for test_name in *; do
-  if [ -d "$test_name" -a -r "$test_name" -a -r "$test_name/info.txt" ]; then
-    ./run-test ${run_args} "$test_name" &
-    test_pids[i]=$!
-    test_names[test_pids[i]]="$test_name"
-    let i+=1
-  fi
-done
+if [ "$sequental" == "yes" ]; then
+  i=0
+  for test_name in *; do
+    if [ -d "$test_name" -a -r "$test_name" -a -r "$test_name/info.txt" ]; then
+      ./run-test ${run_args} "$test_name"
+      RES=$?
+      test_pids[i]=i
+      test_names[test_pids[i]]="$test_name"
+      if [ "$RES" != "0" ]; then
+        let failure_count+=1
+        failed_test_names="$failed_test_names ${test_names[i]}"
+      else
+        let succeeded_count+=1
+      fi
+      let i+=1
+    fi
+  done
+else
+  # start all the tests
+  i=0
+  for test_name in *; do
+    if [ -d "$test_name" -a -r "$test_name" -a -r "$test_name/info.txt" ]; then
+      ./run-test ${run_args} "$test_name" &
+      test_pids[i]=$!
+      test_names[test_pids[i]]="$test_name"
+      let i+=1
+    fi
+  done
 
-# wait for all the tests, collecting the failures
-failure_count=0
-succeeded_count=0
-failed_test_names=""
-for pid in ${test_pids[@]}; do
-  wait $pid
-  if [ "$?" != "0" ]; then
-    let failure_count+=1
-    failed_test_names="$failed_test_names ${test_names[$pid]}[pid=$pid]"
-  else
-    let succeeded_count+=1
-  fi
-done
+  # wait for all the tests, collecting the failures
+  failure_count=0
+  succeeded_count=0
+  failed_test_names=""
+  for pid in ${test_pids[@]}; do
+    wait $pid
+    if [ "$?" != "0" ]; then
+      let failure_count+=1
+      failed_test_names="$failed_test_names ${test_names[$pid]}[pid=$pid]"
+    else
+      let succeeded_count+=1
+    fi
+  done
+fi
 
 echo "succeeded tests: $succeeded_count"
 echo "failed tests: $failure_count"
diff --git a/test/run-test b/test/run-test
index 6e59641..34b06cc 100755
--- a/test/run-test
+++ b/test/run-test
@@ -33,7 +33,7 @@
 progdir=`pwd`
 prog="${progdir}"/`basename "${prog}"`
 test_dir="test-$$"
-tmp_dir="/tmp/${test_dir}"
+tmp_dir="/tmp/$USER/${test_dir}"
 
 export JAVA="java"
 export JAVAC="javac -g"
@@ -99,6 +99,11 @@
         option="$1"
         run_args="${run_args} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        run_args="${run_args} --runtime-option $option"
+        shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
@@ -165,6 +170,7 @@
         break
     fi
 done
+mkdir -p $tmp_dir
 
 if [ ! "$runtime" = "jvm" ]; then
   run_args="${run_args} --lib $lib"
@@ -189,7 +195,7 @@
         fi
         run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
     else
-        run_args="${run_args} --boot -Ximage:/data/art-test${suffix64}/core.art"
+        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
     fi
 fi
 
@@ -233,6 +239,7 @@
         echo "  Runtime Options:"
         echo "    -O                   Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option    Pass an option to the compiler."
+        echo "    -runtime-option      Pass an option to the runtime."
         echo "    --debug              Wait for a debugger to attach."
         echo "    --gdb                Run under gdb; incompatible with some tests."
         echo "    --build-only         Build test files only (off by default)."
@@ -356,7 +363,7 @@
     cd "$oldwd"
     rm -rf "$tmp_dir"
     if [ "$target_mode" = "yes" -a "$build_exit" = "0" ]; then
-        adb shell rm -r $DEX_LOCATION
+        adb shell rm -rf $DEX_LOCATION
     fi
     exit 0
 fi